In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        if filename.endswith('.mat'):  # Filter for .mat files
            print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os

image_count = 0

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        if filename.endswith('.jpg'):  # Check for image files
            image_count += 1

print(f"Number of images found: {image_count}")


# Checking structure of dataset

In [None]:
import scipy.io

# Load the .mat file
mat_file_path = '/kaggle/input/imdb-wiki-faces-dataset/imdb_crop/imdb.mat'  # Update with the correct path
mat_data = scipy.io.loadmat(mat_file_path)

# Print the keys in the .mat file
print("Keys in the .mat file:")
for key in mat_data.keys():
    if not key.startswith('__'):  # Skip internal keys like '__header__', '__version__', etc.
        print(key)

# Inspect the structure of the 'imdb' field (if it exists)
if 'imdb' in mat_data:
    print("\nStructure of 'imdb':")
    print(mat_data['imdb'])

# Loading dataset, converting it to CSV

In [None]:
import scipy.io
import pandas as pd
import numpy as np

# Load the .mat file
mat_file_path = '/kaggle/input/imdb-wiki-faces-dataset/imdb_crop/imdb.mat'  # Update with the correct path
mat_data = scipy.io.loadmat(mat_file_path)

# Extract the 'imdb' structured array
imdb_data = mat_data['imdb'][0][0]

# Extract relevant fields

image_paths = imdb_data[2][0]  # Full path to the image
genders = imdb_data[3][0]  # Gender (1: male, 0: female)

# Creating DataFrames

In [None]:
list_image_paths = [item[0] for item in image_paths]
df_path=pd.DataFrame(list_image_paths)
df_path.rename(columns={0: 'path'}, inplace=True)

df_path

In [None]:
df_gender=pd.DataFrame(genders)
df_gender.rename(columns={0: 'gender'}, inplace=True)
df_gender

In [None]:
# Function to extract 'dob' from 'image_path'
def extract_dob(path):
    # Split the filename to extract the date
    parts = path.split('_')
    dob_str = parts[-2]  # e.g., '1899-5-10'
    return dob_str

# Apply the function to the 'image_path' column
list_dob = df_path['path'].apply(extract_dob).to_frame('dob')
df_dob=pd.DataFrame(list_dob)

df_dob

In [None]:
# Function to extract 'photo_taken' from 'image_path'
def extract_photo_taken(image_path):
    # Split the filename to extract the year
    parts = image_path.split('_')
    photo_taken = int(parts[-1].split('.')[0])  # e.g., '1968.jpg' -> 1968
    return photo_taken

# Apply the function to the 'image_path' column in df_path
df_photo_taken = df_path['path'].apply(extract_photo_taken).to_frame('photo_taken')
df_photo_taken
#df=pd.DataFrame(list_photo_taken)
#df_photo_taken.head(10)

In [None]:
# Extract the year from 'dob' (assuming 'dob' is in the format 'YYYY-MM-DD')
df_dob['dob_year'] = df_dob['dob'].apply(lambda x: int(x.split('-')[0]))

# Calculate age
df_age = (df_photo_taken['photo_taken'] - df_dob['dob_year']).to_frame(name='age')

# Display the first few rows
df_age

In [None]:
df_combined=pd.concat([df_path,df_gender,df_dob,df_photo_taken,df_age],axis=1)
df_combined

In [None]:
# Save the DataFrame to a CSV file
output_csv_path = '/kaggle/working/imdb_processed_data.csv'
df_combined.to_csv(output_csv_path, index=False)

print(f"Processed CSV file saved to: {output_csv_path}")

In [None]:
from tensorflow.keras.utils import Sequence
import numpy as np
import cv2
import os

class DataGenerator(Sequence):
    def __init__(self, df, base_dir, batch_size, target_size):
        self.df = df
        self.base_dir = base_dir
        self.batch_size = batch_size
        self.target_size = target_size
        self.indexes = np.arange(len(self.df))

    def __len__(self):
        return int(np.ceil(len(self.df) / self.batch_size))

    def __getitem__(self, index):
        batch_indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        batch_paths = self.df.iloc[batch_indexes]['path']
        batch_images = []
        batch_labels = []
        for path in batch_paths:
            # Prefix the base_dir to the image path
            full_path = os.path.join(self.base_dir, path)
            image = self.preprocess_image(full_path)
            if image is not None:
                batch_images.append(image)
                batch_labels.append(self.df.iloc[batch_indexes]['age'].values[0])
        return np.array(batch_images), np.array(batch_labels)

    def preprocess_image(self, image_path):
        image = cv2.imread(image_path)
        if image is None:
            print(f"Failed to read image: {image_path}")
            return None
        image = cv2.resize(image, self.target_size)
        image = image.astype('float32') / 255.0  # Normalize
        return image

In [None]:
# Base directory where the images are stored
base_dir = '/kaggle/input/imdb-wiki-faces-dataset/imdb_crop/'

# Create the data generator
batch_size = 32  # Adjust based on your memory limits
target_size = (224, 224)
train_generator = DataGenerator(df_combined, base_dir, batch_size, target_size)

In [None]:
pip install keras-applications

In [None]:
pip install keras==2.12.0 tensorflow==2.12.0 keras-vggface==0.6

In [None]:
from keras_vggface.vggface import VGGFace
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

# Load the VGGFace model
base_model = VGGFace(model='resnet50', include_top=False, input_shape=(224, 224, 3))
print("VGGFace model loaded successfully!")

In [None]:
from keras_vggface.vggface import VGGFace
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model

# Load the pre-trained VGGFace model
base_model = VGGFace(model='vgg16', include_top=False, input_shape=(224, 224, 3))

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers for age prediction
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(1, activation='linear')(x)  # Regression for age prediction

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

In [None]:
# Train the model using the generator
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=10
)

In [None]:
model.save('Age_Sex_Detection.h5')