<a href="https://colab.research.google.com/github/ShravyaMalogi/PROJECT_drafts/blob/main/Age_Detection/ad3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!ls "/content/drive/MyDrive/projects/datasets/"

age_detection_model.h5	imdb_train_new.csv  imdbwiki.zip
imdb_test_new.csv	imdb_valid_new.csv


In [4]:
import zipfile

bad_files = []

with zipfile.ZipFile('/content/drive/MyDrive/projects/datasets/imdbwiki.zip', 'r') as zipf:
    for file in zipf.filelist:
        try:
            zipf.open(file.filename).read()
        except:
            bad_files.append(file.filename)

print("🛑 Corrupted files found:")
for bf in bad_files:
    print(bf)

🛑 Corrupted files found:
imdb-clean-1024/imdb-clean-1024/22/nm0266422_rm3838610432_1974-9-19_2005.jpg
imdb-clean-1024/imdb-clean-1024/24/nm1861624_rm3706501120_1982-11-15_2010.jpg
imdb-clean-1024/imdb-clean-1024/25/nm1553725_rm2946674688_1985-3-15_2011.jpg
imdb-clean-1024/imdb-clean-1024/41/nm0266441_rm2134412800_1961-5-13_1994.jpg
imdb-clean-1024/imdb-clean-1024/43/nm0001743_rm2994522112_1948-1-29_1984.jpg
imdb-clean-1024/imdb-clean-1024/73/nm0001173_rm2329378816_1968-3-12_2008.jpg


In [5]:
import zipfile
import os

# Path to the zip file on your Google Drive
zip_path = '/content/drive/MyDrive/projects/datasets/imdbwiki.zip'  # change this
extract_path = '/content/imdb-wiki_data'

# List of corrupted files inside the ZIP to skip
corrupted_files = bad_files

# Extract all except corrupted files
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    for file in zip_ref.namelist():
        if file not in corrupted_files:
            try:
                zip_ref.extract(file, extract_path)
            except Exception as e:
                print(f"Error extracting {file}: {e}")
        else:
            print(f"Skipping corrupted file: {file}")


Skipping corrupted file: imdb-clean-1024/imdb-clean-1024/22/nm0266422_rm3838610432_1974-9-19_2005.jpg
Skipping corrupted file: imdb-clean-1024/imdb-clean-1024/24/nm1861624_rm3706501120_1982-11-15_2010.jpg
Skipping corrupted file: imdb-clean-1024/imdb-clean-1024/25/nm1553725_rm2946674688_1985-3-15_2011.jpg
Skipping corrupted file: imdb-clean-1024/imdb-clean-1024/41/nm0266441_rm2134412800_1961-5-13_1994.jpg
Skipping corrupted file: imdb-clean-1024/imdb-clean-1024/43/nm0001743_rm2994522112_1948-1-29_1984.jpg
Skipping corrupted file: imdb-clean-1024/imdb-clean-1024/73/nm0001173_rm2329378816_1968-3-12_2008.jpg


# CODE

In [None]:
# Step 1: Imports and Setup
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import mixed_precision
import pandas as pd
import os

# Step 2: Set mixed precision for speed (T4 GPU)
mixed_precision.set_global_policy('mixed_float16')

# Step 3: Load CSVs (adjust paths if needed)
train_df = pd.read_csv('/content/imdb-wiki_data/imdb_train_new_1024.csv')
val_df = pd.read_csv('/content/imdb-wiki_data/imdb_valid_new_1024.csv')
test_df = pd.read_csv('/content/imdb-wiki_data/imdb_test_new_1024.csv')

# Normalize age (keep range small for MSE loss)
train_df['age'] = train_df['age'] / 100.0
val_df['age'] = val_df['age'] / 100.0
test_df['age'] = test_df['age'] / 100.0

# Step 4: Define parameters
image_size = 224  # Try 160 for faster training
batch_size = 32   # Increase if no OOM (64 max on T4 usually)
epochs = 15

# Step 5: Data generators
datagen = ImageDataGenerator(rescale=1./255)

train_gen = datagen.flow_from_dataframe(
    dataframe=train_df,
    directory='/content/imdb-wiki_data/imdb-clean-1024/imdb-clean-1024',
    x_col='filename',
    y_col='age',
    target_size=(image_size, image_size),
    batch_size=batch_size,
    class_mode='raw'
)

val_gen = datagen.flow_from_dataframe(
    dataframe=val_df,
    directory='/content/imdb-wiki_data/imdb-clean-1024/imdb-clean-1024',
    x_col='filename',
    y_col='age',
    target_size=(image_size, image_size),
    batch_size=batch_size,
    class_mode='raw'
)

# Step 6: Build model
base_model = MobileNetV2(include_top=False, weights='imagenet', input_shape=(image_size, image_size, 3))
base_model.trainable = False  # Freeze for speed

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1, dtype='float32')  # force output to float32 (important for mixed precision)
])

# Step 7: Compile
optimizer = Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

# Step 8: Callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2)
checkpoint = ModelCheckpoint("best_model.h5", save_best_only=True, monitor="val_loss")

# Step 9: Train
model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=epochs,
    callbacks=[early_stop, reduce_lr, checkpoint]
)

# Step 10: Prediction
# You can multiply by 100 to get the real age back
# predicted_age = model.predict(some_image_batch)[0] * 100

Found 183884 validated image filenames.




Found 45972 validated image filenames.


  self._warn_if_super_not_called()


Epoch 1/15
[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step - loss: 0.0411 - mae: 0.1360



[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m756s[0m 129ms/step - loss: 0.0411 - mae: 0.1360 - val_loss: 0.0121 - val_mae: 0.0855 - learning_rate: 1.0000e-04
Epoch 2/15
[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - loss: 0.0129 - mae: 0.0885



[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m654s[0m 114ms/step - loss: 0.0129 - mae: 0.0885 - val_loss: 0.0115 - val_mae: 0.0834 - learning_rate: 1.0000e-04
Epoch 3/15
[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m680s[0m 114ms/step - loss: 0.0119 - mae: 0.0849 - val_loss: 0.0116 - val_mae: 0.0833 - learning_rate: 1.0000e-04
Epoch 4/15
[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - loss: 0.0114 - mae: 0.0834



[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m649s[0m 113ms/step - loss: 0.0114 - mae: 0.0834 - val_loss: 0.0115 - val_mae: 0.0826 - learning_rate: 1.0000e-04
Epoch 5/15
[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - loss: 0.0112 - mae: 0.0826



[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m655s[0m 114ms/step - loss: 0.0112 - mae: 0.0826 - val_loss: 0.0112 - val_mae: 0.0828 - learning_rate: 5.0000e-05
Epoch 6/15
[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - loss: 0.0111 - mae: 0.0820



[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m651s[0m 113ms/step - loss: 0.0111 - mae: 0.0820 - val_loss: 0.0111 - val_mae: 0.0822 - learning_rate: 5.0000e-05
Epoch 7/15
[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m687s[0m 114ms/step - loss: 0.0110 - mae: 0.0817 - val_loss: 0.0111 - val_mae: 0.0822 - learning_rate: 5.0000e-05
Epoch 8/15
[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step - loss: 0.0108 - mae: 0.0812



[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m681s[0m 118ms/step - loss: 0.0108 - mae: 0.0812 - val_loss: 0.0111 - val_mae: 0.0819 - learning_rate: 2.5000e-05
Epoch 9/15
[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step - loss: 0.0107 - mae: 0.0807



[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m684s[0m 119ms/step - loss: 0.0107 - mae: 0.0807 - val_loss: 0.0111 - val_mae: 0.0822 - learning_rate: 2.5000e-05
Epoch 10/15
[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step - loss: 0.0106 - mae: 0.0803



[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m677s[0m 118ms/step - loss: 0.0106 - mae: 0.0803 - val_loss: 0.0111 - val_mae: 0.0820 - learning_rate: 1.2500e-05
Epoch 11/15
[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m686s[0m 119ms/step - loss: 0.0106 - mae: 0.0805 - val_loss: 0.0111 - val_mae: 0.0823 - learning_rate: 1.2500e-05
Epoch 12/15
[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step - loss: 0.0105 - mae: 0.0801



[1m5747/5747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m684s[0m 119ms/step - loss: 0.0105 - mae: 0.0801 - val_loss: 0.0111 - val_mae: 0.0821 - learning_rate: 6.2500e-06
Epoch 13/15
[1m3089/5747[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m4:02[0m 91ms/step - loss: 0.0106 - mae: 0.0802