In [5]:
from google.colab import drive

import os
import zipfile

# Mount your Google Drive
drive.mount('/content/drive')

# Define the path to the zip file (update this if needed)
zip_file_path = '/content/drive/MyDrive/Face, Age, Gender/data/data.zip'

# Define the folder where you want to unzip the file
extract_folder = '/content/drive/MyDrive/Face, Age, Gender/data/data'

# Unzip the file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_folder)

print("Unzipping complete!")

In [7]:
import pandas as pd
import numpy as np

In [8]:
raw_train_data = pd.read_csv('/content/drive/MyDrive/Face, Gender, Age data/data/data/train.csv')
raw_train_data.head(20)

In [9]:
import os
import pandas as pd
from PIL import Image
import numpy as np


data_path = '/content/drive/MyDrive/Face, Gender, Age data/data/data/train.csv'
img_folder = '/content/drive/MyDrive/Face, Gender, Age data/data/data/image_data'


def process_images_and_labels(data_path, img_folder, target_size=(128,128)):
    raw_train_data = pd.read_csv(data_path)
    x_train = []
    y_age_train = []
    y_gender_train = []

    for idx, row in raw_train_data.iterrows():
        img_path = os.path.join(img_folder, row['Filename'])

        if os.path.exists(img_path):
            img = Image.open(img_path).convert('RGB')
            img = img.resize(target_size)

            x_train.append(np.array(img)) 

        if row['Young'] == 1:
            y_age_train.append('Young')
        elif row['Middle_Aged'] == 1:
            y_age_train.append('Middle_Aged')
        elif row['Senior'] == 1:
            y_age_train.append('Senior')
        else:
            y_age_train.append('Middle_Aged')

        if row['Male'] == 1:
            y_gender_train.append('male')
        elif row['Male'] == -1:
            y_gender_train.append('female')
        else :
          y_gender_train.append('male')

    return np.array(x_train), np.array(y_age_train), np.array(y_gender_train)

In [10]:
x_train, y_age_train, y_gender_train = process_images_and_labels(data_path, img_folder)
print(x_train.shape)
print(len(y_age_train))
print((y_gender_train))

In [11]:
class age_detector(nn.Module):
    def __init__(self, image_channels=3, num_classes=3):
        super().__init__()

        self.feature_extractor = nn.Sequential(
            nn.Conv2d(image_channels, 16, kernel_size=4, stride=2),
            nn.InstanceNorm2d(16),
            nn.LeakyReLU(0.2),
            nn.Conv2d(16, 32, kernel_size=4, stride=2),
            nn.InstanceNorm2d(32),
            nn.LeakyReLU(0.2),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.InstanceNorm2d(64),
            nn.LeakyReLU(0.2),
            nn.Conv2d(64, 128, kernel_size=4, stride=2),#6
            nn.InstanceNorm2d(128),
            nn.LeakyReLU(0.2),
            nn.Conv2d(128, 256, kernel_size=4, stride=2),#6
            nn.InstanceNorm2d(256),
            nn.LeakyReLU(0.2),
            Flatten()
        )

        self.classifier = nn.Sequential(
            nn.Linear(1024, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 16),
            nn.LeakyReLU(0.2),
            nn.Linear(16, num_classes)
            )


    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.classifier(x)
        return x

In [14]:
from torch.optim import Adam

criterion = nn.CrossEntropyLoss()
age_model = age_detector()
optim = Adam(age_model.parameters(), lr=0.002)
device = torch.device('cuda')
age_model.to(device)

In [24]:
x_train, y_age_train, y_gender_train = process_images_and_labels(data_path, img_folder)
print(x_train.shape)
print(len(y_age_train))
print((y_gender_train))

In [25]:
print(len(y_age_train))
print(len(y_gender_train))

In [26]:
import torch
import torch.utils.data as data
import torchvision.transforms as transforms
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from PIL import Image

In [27]:
def encode_labels(y_age_train, y_gender_train):
  age_encoder = LabelEncoder()
  gender_encoder = LabelEncoder()
  y_age_train_encoded = age_encoder.fit_transform(y_age_train)
  y_gender_train_encoded = gender_encoder.fit_transform(y_gender_train)

  return y_age_train_encoded, y_gender_train_encoded


y_age_train , y_gender_train = encode_labels(y_age_train, y_gender_train)

In [28]:
train_transform = transforms.Compose([
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(0.5,0.5)]
)
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(0.5,0.5)
])

In [29]:
class ageDataset(data.Dataset):
  def __init__(self, x_data, y_age, transform=None):
    self.x_data = x_data
    self.y_age = y_age
    self.transform = transform

  def __len__(self):
    return len(self.x_data)

  def __getitem__(self, index):
    image = Image.fromarray(self.x_data[index])
    age_label = self.y_age[index]
    if self.transform:
      image = self.transform(image)
    return image, torch.tensor(age_label, dtype=torch.long)
train_age_dataset = ageDataset(x_train, y_age_train,  transform=train_transform)

In [30]:
train_loader_age = data.DataLoader(train_age_dataset, batch_size=32, shuffle=True)

In [31]:
class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)

In [34]:
def train(model, train_loader, optimizer, criterion, device):
  model.train()
  running_loss = 0.0
  correct = 0
  total = 0
  for images, labels in tqdm(train_loader):
    images, labels = images.to(device), labels.to(device)

    optimizer.zero_grad()
    outputs = model(images)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

    running_loss += loss.item()
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  accuracy = 100 * correct / total
  return running_loss / len(train_loader), accuracy

In [35]:
num_epochs = 25

from tqdm import tqdm


for epoch in range(num_epochs):
  train_loss, train_acc = train(age_model, train_loader_age, optim, criterion, device)
  print(f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%')
  print(epoch+1)
torch.save(age_model.state_dict(), "age_final.pth")

In [36]:
y_age_train.head(20)

In [37]:
print(y_age_train[:5])

In [38]:
print(y_age_train[:20])

In [39]:
print(y_age_train[:400])

In [40]:
print(y_gender_train[:20])

In [41]:
print(y_gender_train[:40])

In [42]:
print(y_gender_train[:90])

In [43]:
print(y_gender_train[:100])

In [44]:
print(y_gender_train[:400])

In [45]:
print(y_gender_train[:800])

In [46]:
class sexDataset(data.Dataset):
  def __init__(self, x_data, y_gender, transform=None):
    self.x_data = x_data
    self.y_sex = y_gender
    self.transform = transform

  def __len__(self):
    return len(self.x_data)

  def __getitem__(self, index):
    image = Image.fromarray(self.x_data[index])
    sex_label = self.y_sex[index]
    if self.transform:
      image = self.transform(image)
    return image, torch.tensor(sex_label, dtype=torch.long)
train_sex_dataset = sexDataset(x_train, y_gender_train,  transform=train_transform)
train_loader_sex = data.DataLoader(train_sex_dataset, batch_size=32, shuffle=True)

In [47]:
import torch.nn as nn

class sex_detector(nn.Module):
    def __init__(self, image_channels=3, num_classes=2):
        super().__init__()

        self.feature_extractor = nn.Sequential(
            nn.Conv2d(image_channels, 16, kernel_size=4, stride=2),
            nn.InstanceNorm2d(16),
            nn.LeakyReLU(0.2),
            nn.Conv2d(16, 32, kernel_size=4, stride=2),
            nn.InstanceNorm2d(32),
            nn.LeakyReLU(0.2),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.InstanceNorm2d(64),
            nn.LeakyReLU(0.2),
            nn.Conv2d(64, 128, kernel_size=4, stride=2),#6
            nn.InstanceNorm2d(128),
            nn.LeakyReLU(0.2),
            nn.Conv2d(128, 256, kernel_size=4, stride=2),#6
            nn.InstanceNorm2d(256),
            nn.LeakyReLU(0.2),
            Flatten()
        )

        self.classifier = nn.Sequential(
            nn.Linear(1024, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 16),
            nn.LeakyReLU(0.2),
            nn.Linear(16, num_classes)
            )


    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.classifier(x)
        return x

In [48]:
from torch.optim import Adam

criterion2 = nn.CrossEntropyLoss()
sex_model = sex_detector()
optim2 = Adam(sex_model.parameters(), lr=0.002)
device = torch.device('cuda')
sex_model.to(device)

In [49]:
def train(model, train_loader, optimizer, criterion, device):
  model.train()
  running_loss = 0.0
  correct = 0
  total = 0
  for images, labels in tqdm(train_loader):
    images, labels = images.to(device), labels.to(device)

    optimizer.zero_grad()
    outputs = model(images)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

    running_loss += loss.item()
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  accuracy = 100 * correct / total
  return running_loss / len(train_loader), accuracy

In [50]:
num_epochs = 15

from tqdm import tqdm


for epoch in range(num_epochs):
  train_loss, train_acc = train(sex_model, train_loader_sex, optim2, criterion2, device)

  print(f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%')
  print(f'sex :{epoch+1}')


torch.save(sex_model.state_dict(), "sex_final.pth")

In [54]:
test_data = pd.read_csv('/content/drive/MyDrive/Face, Age, Gender/data/data/test.csv')
test_data.head()

In [55]:
import os
import pandas as pd
from PIL import Image
import numpy as np


data_path = '/content/drive/MyDrive/Face, Gender, Age data/data/data/test.csv'
img_folder = '/content/drive/MyDrive/Face, Gender, Age data/data/data/image_data'


def test_process_images_and_labels(data_path, img_folder, target_size=(128,128)):
    raw_train_data = pd.read_csv(data_path)
    x_test = []
    y_age_test = []
    y_gender_test = []

    for idx, row in raw_train_data.iterrows():
        img_path = os.path.join(img_folder, row['Filename'])

        if os.path.exists(img_path):
            img = Image.open(img_path).convert('RGB')
            img = img.resize(target_size)

            x_test.append(np.array(img))

    return np.array(x_test)

In [56]:
x_test = test_process_images_and_labels(data_path, img_folder)

In [59]:
gender_predictions = []
age_predictions = []

sex_model.eval().to(device)
age_model.eval().to(device)

with torch.no_grad():
  for images in test_data_loader:
    images = images.to(device)

    gender_outputs = sex_model(images)
    age_outputs = age_model(images)

    gender_pred = torch.argmax(gender_outputs, dim=1).cpu().numpy()
    age_pred = torch.argmax(age_outputs, dim=1).cpu.numpy()

    gender_predictions.extend(gender_pred)
    age_predictions.extend(age_pred)

In [61]:
x_test = test_process_images_and_labels(data_path, img_folder)
test_loader = data.DataLoader(x_test, batch_size=32, shuffle=False)

In [66]:
class testDataset(data.Dataset):
  def __init__(self, x_data, transform=None):
    self.x_data = x_data
    self.transform = transform

  def __len__(self):
    return len(self.x_data)

  def __getitem__(self, index):
    image = Image.fromarray(self.x_data[index])
    age_label = self.y_age[index]
    if self.transform:
      image = self.transform(image)
    return image
test_dataset = testDataset(x_test,  transform=test_transform)

In [67]:
test_loader = data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [68]:
gender_predictions = []
age_predictions = []

sex_model.eval().to(device)
age_model.eval().to(device)

with torch.no_grad():
  for images in test_loader:
    images = images.to(device).float() 

    gender_outputs = sex_model(images)
    age_outputs = age_model(images)

    gender_pred = torch.argmax(gender_outputs, dim=1).cpu().numpy()
    age_pred = torch.argmax(age_outputs, dim=1).cpu.numpy()

    gender_predictions.extend(gender_pred)
    age_predictions.extend(age_pred)

In [69]:
class testDataset(data.Dataset):
    def __init__(self, x_data, transform=None):
        self.x_data = x_data
        self.transform = transform

    def __len__(self):
        return len(self.x_data)

    def __getitem__(self, index):
        image = Image.fromarray(self.x_data[index])
        if self.transform:
            image = self.transform(image)
        return image 

test_dataset = testDataset(x_test,  transform=test_transform)

In [70]:
test_loader = data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [71]:
gender_predictions = []
age_predictions = []

sex_model.eval().to(device)
age_model.eval().to(device)

with torch.no_grad():
  for images in test_loader:
    images = images.to(device).float() 

    gender_outputs = sex_model(images)
    age_outputs = age_model(images)

    gender_pred = torch.argmax(gender_outputs, dim=1).cpu().numpy()
    age_pred = torch.argmax(age_outputs, dim=1).cpu.numpy()

    gender_predictions.extend(gender_pred)
    age_predictions.extend(age_pred)

In [72]:
gender_predictions = []
age_predictions = []

sex_model.eval().to(device)
age_model.eval().to(device)

with torch.no_grad():
  for images in test_loader:
    images = images.to(device).float() 

    gender_outputs = sex_model(images)
    age_outputs = age_model(images)

    gender_pred = torch.argmax(gender_outputs, dim=1).cpu().numpy()
    age_pred = torch.argmax(age_outputs, dim=1).cpu().numpy()

    gender_predictions.extend(gender_pred)
    age_predictions.extend(age_pred)

In [73]:
import pandas as pd


male_column = []
young_column = []
middle_aged_column = []
senior_column = []

for gender_pred, age_pred in zip(gender_predictions, age_predictions):

    male_column.append(1 if gender_pred == 1 else -1)

    if age_pred == 0:
        young_column.append(-1)
        middle_aged_column.append(1)
        senior_column.append(-1)
    elif age_pred == 1:
        young_column.append(-1)
        middle_aged_column.append(-1)
        senior_column.append(1)
    elif age_pred == 2:
        young_column.append(1)
        middle_aged_column.append(-1)
        senior_column.append(-1)

submission = pd.DataFrame({
    'Male': male_column,
    'Young': young_column,
    'Middle_Aged': middle_aged_column,
    'Senior': senior_column
})

submission.head(10)