<a href="https://colab.research.google.com/github/DishaJillella/MPI/blob/main/MPI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

dataset_path = "/content/drive/MyDrive/UTKFace"  # Adjust this path based on where the dataset is stored


In [None]:
import kagglehub

# Download the dataset again
path = kagglehub.dataset_download("jangedoo/utkface-new")

print("✅ Dataset downloaded at:", path)



✅ Dataset downloaded at: /root/.cache/kagglehub/datasets/jangedoo/utkface-new/versions/1


In [None]:
import os
import pandas as pd
import random

# ✅ Correct dataset path
dataset_path = "/root/.cache/kagglehub/datasets/jangedoo/utkface-new/versions/1/UTKFace"

# ✅ Get image files
image_files = [f for f in os.listdir(dataset_path) if f.endswith(".jpg")]

# ✅ Limit dataset to 100 images
image_files = random.sample(image_files, 100)

# ✅ Create DataFrame with missing person details
df = pd.DataFrame({
    "Image Path": [os.path.join(dataset_path, f) for f in image_files],
    "Name": [f"Person_{i}" for i in range(1, 101)],
    "Age": [random.randint(10, 80) for _ in range(100)],
    "Gender": [random.choice(["Male", "Female"]) for _ in range(100)],
    "Ethnicity": [random.choice(["Asian", "Black", "White", "Hispanic", "Other"]) for _ in range(100)],
    "Last Seen Location": [random.choice(["New York", "Los Angeles", "Chicago", "Houston", "Miami"]) for _ in range(100)],
    "Missing Since": [f"{random.randint(2015, 2023)}-{random.randint(1,12):02d}-{random.randint(1,28):02d}" for _ in range(100)]
})

# ✅ Save as CSV
csv_path = "/content/missing_persons_dataset.csv"
df.to_csv(csv_path, index=False)

print(f"✅ Dataset saved at: {csv_path}")
print(df.head())


✅ Dataset saved at: /content/missing_persons_dataset.csv
                                          Image Path      Name  Age  Gender  \
0  /root/.cache/kagglehub/datasets/jangedoo/utkfa...  Person_1   21    Male   
1  /root/.cache/kagglehub/datasets/jangedoo/utkfa...  Person_2   55  Female   
2  /root/.cache/kagglehub/datasets/jangedoo/utkfa...  Person_3   49    Male   
3  /root/.cache/kagglehub/datasets/jangedoo/utkfa...  Person_4   57  Female   
4  /root/.cache/kagglehub/datasets/jangedoo/utkfa...  Person_5   29    Male   

  Ethnicity Last Seen Location Missing Since  
0  Hispanic            Chicago    2017-06-05  
1     Asian        Los Angeles    2022-01-27  
2     Black              Miami    2020-08-07  
3     Asian            Houston    2020-07-28  
4     Black              Miami    2017-09-01  


In [None]:
import pandas as pd

# ✅ Load CSV
csv_path = "/content/missing_persons_dataset.csv"
df = pd.read_csv(csv_path)

# ✅ Convert absolute to relative paths (Assuming images are in 'UTKFace' folder)
df["Image Path"] = df["Image Path"].apply(lambda x: "UTKFace/" + x.split("/")[-1])

# ✅ Save updated CSV
df.to_csv("/content/missing_persons_dataset_fixed.csv", index=False)

print("✅ CSV paths updated! Upload `missing_persons_dataset_fixed.csv` to GitHub.")


✅ CSV paths updated! Upload `missing_persons_dataset_fixed.csv` to GitHub.


In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from sklearn.model_selection import train_test_split

# Load Dataset Paths
dataset_path = "/root/.cache/kagglehub/datasets/jangedoo/utkface-new/versions/1/UTKFace"
image_files = [f for f in os.listdir(dataset_path) if f.endswith(".jpg")]

# Preprocess Data
X, y = [], []
for img_file in image_files[:1000]:  # Limit dataset size
    img_path = os.path.join(dataset_path, img_file)
    img = load_img(img_path, target_size=(224, 224))
    img = img_to_array(img) / 255.0
    X.append(img)

    age = int(img_file.split("_")[0])  # Extract age from filename
    y.append(age)

X = np.array(X)
y = np.array(y)
y = np.clip(y, 0, 100)  # Normalize age range

# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Load Pretrained Model
base_model = MobileNetV2(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(128, activation="relu")(x)
x = Dense(1, activation="linear")(x)  # Age prediction as regression

model = Model(inputs=base_model.input, outputs=x)

for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer="adam", loss="mse", metrics=["mae"])

# Train Model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=32)

# Evaluate Model
loss, mae = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}, Test MAE: {mae}")

# Test Model on a New Image
test_img_path = os.path.join(dataset_path, image_files[0])
test_img = load_img(test_img_path, target_size=(224, 224))
test_img = img_to_array(test_img) / 255.0
test_img = np.expand_dims(test_img, axis=0)

predicted_age = model.predict(test_img)
print(f"Predicted Age: {predicted_age[0][0]}")


Epoch 1/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 313ms/step - loss: 838.5286 - mae: 23.3997 - val_loss: 434.9910 - val_mae: 16.0301
Epoch 2/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 47ms/step - loss: 317.1279 - mae: 13.6400 - val_loss: 362.5496 - val_mae: 15.0205
Epoch 3/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - loss: 254.3703 - mae: 12.4347 - val_loss: 319.9518 - val_mae: 14.1783
Epoch 4/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - loss: 221.6603 - mae: 11.6149 - val_loss: 290.0763 - val_mae: 13.5983
Epoch 5/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step - loss: 187.5605 - mae: 10.7214 - val_loss: 289.6843 - val_mae: 13.3936
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 297.7456 - mae: 13.7502
Test Loss: 289.684326171875, Test MAE: 13.3935546875
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m

In [None]:
import os
import pandas as pd
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from sklearn.neighbors import NearestNeighbors

csv_path = "/content/missing_persons_dataset.csv"
df = pd.read_csv(csv_path)

base_model = MobileNetV2(weights="imagenet", include_top=False, pooling="avg")

def preprocess_image(image_path):
    if not os.path.exists(image_path):
        print(f"❌ Image not found: {image_path}")
        return None
    img = cv2.imread(image_path)
    if img is None:
        print(f"❌ Unable to read image: {image_path}")
        return None
    img = cv2.resize(img, (224, 224))
    img = preprocess_input(img)
    return img

def extract_features(image_path):
    img = preprocess_image(image_path)
    if img is None:
        return None
    img = np.expand_dims(img, axis=0)
    features = base_model.predict(img)
    return features.flatten()

feature_list = []
valid_images = []

for img_path in df["Image Path"]:
    features = extract_features(img_path)
    if features is not None:
        feature_list.append(features)
        valid_images.append(img_path)

df = df[df["Image Path"].isin(valid_images)]

feature_array = np.array(feature_list)

knn = NearestNeighbors(n_neighbors=1, metric="euclidean")
knn.fit(feature_array)

def find_missing_person(test_image_path):
    test_features = extract_features(test_image_path)
    if test_features is None:
        print("❌ Test image not found or unreadable.")
        return

    distance, index = knn.kneighbors([test_features])

    if distance[0][0] < 10:
        match = df.iloc[index[0][0]]
        print("✅ Match Found!")
        print(match)
    else:
        print("❌ No match found.")




  base_model = MobileNetV2(weights="imagenet", include_top=False, pooling="avg")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms

In [None]:
test_image_path = df["Image Path"].iloc[0]  # First image in dataset
find_missing_person(test_image_path)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
✅ Match Found!
Image Path            /root/.cache/kagglehub/datasets/jangedoo/utkfa...
Name                                                           Person_1
Age                                                                  21
Gender                                                             Male
Ethnicity                                                      Hispanic
Last Seen Location                                              Chicago
Missing Since                                                2017-06-05
Name: 0, dtype: object
