In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

# Define the batch size
batch_size = 32

### read the data

In [2]:
train_data = pd.read_csv(os.path.join('..', 'data', 'train.csv'))
val_data = pd.read_csv(os.path.join('..', 'data', 'val.csv'))
test_data = pd.read_csv(os.path.join('..', 'data', 'test.csv'))

### add the path of the images

In [3]:
def construct_img_path(row):
    return os.path.join("..", "data", "faces", row['user_id'],
                        "coarse_tilt_aligned_face." + str(row['face_id']) + "." + row['original_image'])


train_data['img_path'] = train_data.apply(construct_img_path, axis=1)
val_data['img_path'] = val_data.apply(construct_img_path, axis=1)
test_data['img_path'] = test_data.apply(construct_img_path, axis=1)
train_data.head(5)

Unnamed: 0,user_id,face_id,original_image,age,gender,img_path
0,9855553@N08,1581,11658657103_4485e3f5ac_o.jpg,"(60, 100)",m,..\data\faces\9855553@N08\coarse_tilt_aligned_...
1,114841417@N06,502,12059583524_606ca96139_o.jpg,"(15, 20)",m,..\data\faces\114841417@N06\coarse_tilt_aligne...
2,66870968@N06,1227,11326189206_e08bdf6dfd_o.jpg,"(25, 32)",m,..\data\faces\66870968@N06\coarse_tilt_aligned...
3,8187011@N06,988,11133041085_e2ee5e12cb_o.jpg,"(0, 2)",u,..\data\faces\8187011@N06\coarse_tilt_aligned_...
4,114841417@N06,485,12059753735_7141b5443c_o.jpg,"(15, 20)",f,..\data\faces\114841417@N06\coarse_tilt_aligne...


#### add column for check if the image exists
it will help us to detect if there is any missing image, or if there is any bug in the path construction

In [4]:
train_data['img_exists'] = train_data['img_path'].apply(os.path.exists)
val_data['img_exists'] = val_data['img_path'].apply(os.path.exists)
test_data['img_exists'] = test_data['img_path'].apply(os.path.exists)

train_data.head(5)

Unnamed: 0,user_id,face_id,original_image,age,gender,img_path,img_exists
0,9855553@N08,1581,11658657103_4485e3f5ac_o.jpg,"(60, 100)",m,..\data\faces\9855553@N08\coarse_tilt_aligned_...,True
1,114841417@N06,502,12059583524_606ca96139_o.jpg,"(15, 20)",m,..\data\faces\114841417@N06\coarse_tilt_aligne...,True
2,66870968@N06,1227,11326189206_e08bdf6dfd_o.jpg,"(25, 32)",m,..\data\faces\66870968@N06\coarse_tilt_aligned...,True
3,8187011@N06,988,11133041085_e2ee5e12cb_o.jpg,"(0, 2)",u,..\data\faces\8187011@N06\coarse_tilt_aligned_...,True
4,114841417@N06,485,12059753735_7141b5443c_o.jpg,"(15, 20)",f,..\data\faces\114841417@N06\coarse_tilt_aligne...,True


In [7]:
# Encode age labels
age_encoder = LabelEncoder()
train_data['age_label'] = age_encoder.fit_transform(train_data['age'])
val_data['age_label'] = age_encoder.transform(val_data['age'])
test_data['age_label'] = age_encoder.transform(test_data['age'])
num_classes = len(age_encoder.classes_)
print("Age classes:", age_encoder.classes_)
train_data.head(5)

Age classes: ['(0, 2)' '(15, 20)' '(25, 32)' '(38, 43)' '(4, 6)' '(48, 53)' '(60, 100)'
 '(8, 23)']


Unnamed: 0,user_id,face_id,original_image,age,gender,img_path,img_exists,age_label
0,9855553@N08,1581,11658657103_4485e3f5ac_o.jpg,"(60, 100)",m,..\data\faces\9855553@N08\coarse_tilt_aligned_...,True,6
1,114841417@N06,502,12059583524_606ca96139_o.jpg,"(15, 20)",m,..\data\faces\114841417@N06\coarse_tilt_aligne...,True,1
2,66870968@N06,1227,11326189206_e08bdf6dfd_o.jpg,"(25, 32)",m,..\data\faces\66870968@N06\coarse_tilt_aligned...,True,2
3,8187011@N06,988,11133041085_e2ee5e12cb_o.jpg,"(0, 2)",u,..\data\faces\8187011@N06\coarse_tilt_aligned_...,True,0
4,114841417@N06,485,12059753735_7141b5443c_o.jpg,"(15, 20)",f,..\data\faces\114841417@N06\coarse_tilt_aligne...,True,1


### Load and preprocess images

In [8]:
# Create an ImageDataGenerator for training data
train_datagen = ImageDataGenerator(rescale=1.0 / 255.0)  # Normalize pixel values to [0, 1]

# Create a generator for training data
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    directory=None,  # Since img_path already contains full paths
    x_col="img_path",  # Column containing image paths
    y_col="age_label",  # Column containing labels
    target_size=(128, 128),  # Resize images to 128x128
    batch_size=batch_size,
    class_mode="raw",  # Use "raw" for regression or multi-class classification
    shuffle=True
)

# Create a generator for validation data
val_generator = train_datagen.flow_from_dataframe(
    dataframe=val_data,
    directory=None,
    x_col="img_path",
    y_col="age_label",
    target_size=(128, 128),
    batch_size=batch_size,
    class_mode="raw",
    shuffle=False
)

# Create a generator for test data
test_generator = train_datagen.flow_from_dataframe(
    dataframe=test_data,
    directory=None,
    x_col="img_path",
    y_col="age_label",
    target_size=(128, 128),
    batch_size=batch_size,
    class_mode="raw",
    shuffle=False
)

Found 11856 validated image filenames.
Found 2964 validated image filenames.
Found 3731 validated image filenames.


### Build the Softmax Regression Model

In [None]:
# Define the model
model = Sequential([
    Flatten(input_shape=(128, 128, 3)),  # Flatten the image to a vector
    Dense(128, activation='relu'),  # Fully connected layer with ReLU activation
    Dense(num_classes, activation='softmax')  # Output layer with softmax activation
])

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=0.001),  # Adam optimizer
    loss='sparse_categorical_crossentropy',  # Loss function for integer labels
    metrics=['accuracy']  # Track accuracy
)

# Print the model summary
model.summary()

### Train the Model

In [None]:
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),  # Number of batches per epoch
    validation_data=val_generator,
    validation_steps=len(val_generator),  # Number of validation batches
    epochs=10  # Number of epochs
)

### Evaluate the Model

In [None]:
# Evaluate the model on the test dataset
test_loss, test_acc = model.evaluate(test_generator, steps=len(test_generator))
print(f"Test accuracy: {test_acc}")

### Make Predictions

In [None]:
# Make predictions on the test dataset
predictions = model.predict(test_generator, steps=len(test_generator))
predicted_labels = np.argmax(predictions, axis=1)

# Print some predictions
for i in range(5):
    print(
        f"Predicted: {age_encoder.inverse_transform([predicted_labels[i]])}, Actual: {age_encoder.inverse_transform([test_generator.labels[i]])}")

### Save the Model

In [None]:
# Save the model
model.save("age_classification_model.h5")

###  How To Load The Model

``` python
from tensorflow.keras.models import load_model

# Load the model
loaded_model = load_model("age_classification_model.h5")

```

### How To Visualize Training Results


``` python
import matplotlib.pyplot as plt

# Plot training & validation accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Plot training & validation loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
```