In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing required Libraries

In [None]:
# importing pytorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Running on {device}")

# Sets the seed for random number generation on the CPU
torch.manual_seed(42)

# Sets the seed for random number generation on all GPUs
torch.cuda.manual_seed_all(42)

# torchvision to convert .jpg into tensor
from torchvision import transforms
import torchvision.models as models
import torchvision

# to visualize images
from PIL import Image
import matplotlib.pyplot as plt

import os
import kagglehub
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
# import trackio
from sklearn.metrics import f1_score

In [None]:
# Huggingface Access Adding token to environment variable
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
import os
hf_token = user_secrets.get_secret("hf_token")
os.environ['HF_TOKEN'] = hf_token

# Importing data

In [None]:
sample_submission = pd.read_csv("/kaggle/input/sep-25-dl-gen-ai-nppe-1/face_dataset/sample_submission.csv")
train_df = pd.read_csv("/kaggle/input/sep-25-dl-gen-ai-nppe-1/face_dataset/train.csv")
test_df = pd.read_csv("/kaggle/input/sep-25-dl-gen-ai-nppe-1/face_dataset/test.csv")

print("train_df: \n")
print(train_df.head())

print("test_df: \n")
print(test_df.head())

print("sample_submission: \n")
print(sample_submission.head())

In [None]:
data_path = "/kaggle/input/sep-25-dl-gen-ai-nppe-1/face_dataset/"
# fig , axes = plt.subplots(2,3,figsize=(12, 8))
# axes = axes.flatten()
# for i,(index , path , gender , age) in enumerate(train_df.sample(6,random_state=42).values):
#     img = Image.open(data_path+path)
#     axes[i].imshow(img)
#     axes[i].set_title(f"Gender: {gender}, Age: {age}", fontsize=14)
#     axes[i].axis("off")

# plt.tight_layout()
# plt.show()

Gender 1 correspond to Male 

Gender 0 correspond to Female

# Creating Train and Validation split

In [None]:
train_df, val_df = train_test_split(train_df, test_size=0.05, random_state=42, stratify=train_df['gender'])
print(f"train shape:{train_df.shape}")
print(f"validation shape:{val_df.shape}")
min_age = train_df["age"].min()
max_age = train_df["age"].max()

# Converting image to tensor and creating DataLoaders.

## Creating custom class for Train and validation data.

In [None]:
class FaceData(Dataset):
    """ Custom Dataset class so that we can use dataloader"""
    
    def __init__(self,df,data_path="/kaggle/input/sep-25-dl-gen-ai-nppe-1/face_dataset/",transform=None):
        
        self.df = df
        self.data_path = data_path
        self.transform= transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self,idx):
        
        row = self.df.iloc[idx]
        
        img_path = row["full_path"]
        gender = row["gender"]
        age = row["age"]
        
        img = Image.open(self.data_path+img_path)

        img_tensor = self.transform(img)


        
        return img_tensor , torch.tensor(gender, dtype=torch.float32) , torch.tensor(age, dtype=torch.float32)
        

## Creating custom class for Test data.

In [None]:
class facetest(Dataset):
    """ Custom class for testdataset  """
    
    def __init__(self,df,data_path="/kaggle/input/sep-25-dl-gen-ai-nppe-1/face_dataset/",transform=None):
        
        self.df = df
        self.data_path = data_path
        self.transform= transform

    def __len__(self):

        return len(self.df)

    def __getitem__(self,idx):
        
        row = self.df.iloc[idx]
        path = row["full_path"]
        img = Image.open(self.data_path+path)

        img_tensor = self.transform(img)

        return img_tensor
        


In [None]:
# transform will resize and then covert a image to tensor
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Lambda(lambda im: im.convert('RGB')),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],    # standard image net parametrs for better results
                         std=[0.229, 0.224, 0.225])
])

# transform with augmentation for training data
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Lambda(lambda im: im.convert('RGB')),

    transforms.RandomHorizontalFlip(p=0.5), # random flips
    transforms.RandomRotation(15),          # random rotation
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2), 
    
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [None]:
# creating FaceData object and then DataLoaders with batch size 32
train_dataset = FaceData(train_df, data_path, transform=train_transform)
val_dataset = FaceData(val_df, data_path, transform=val_transform)
test_dataset = facetest(test_df, data_path, transform=val_transform)

print(f"Created {len(train_dataset)} training samples.")
print(f"Created {len(val_dataset)} validation samples.")
print(f"Created {len(test_dataset)} test samples.")

batch_size = 32

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size = batch_size,
    shuffle = True,
    num_workers=4
)


val_loader = DataLoader(
    dataset=val_dataset,
    batch_size = batch_size,
    shuffle = False
)

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size = batch_size,
    shuffle = False
)

# (Scratch CNN)

In [None]:


# class faceCnn2(nn.Module):
#     def __init__(self , input_shape , hidden_shape ):
#         super().__init__()

#         self.conv_block_1 = nn.Sequential(
#             nn.Conv2d(in_channels=input_shape , out_channels=hidden_shape , kernel_size=3 , stride=1 , padding=1),
#             nn.ReLU(),
#             nn.Conv2d(in_channels=hidden_shape , out_channels=hidden_shape*2 , kernel_size=3 , stride=1 , padding=1),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2)
#         )
#         self.conv_block_2 = nn.Sequential(
#             nn.Conv2d(in_channels=hidden_shape*2 , out_channels=hidden_shape*2 , kernel_size=3 , stride=1 , padding=1),
#             nn.ReLU(),
#             nn.Conv2d(in_channels=hidden_shape*2 , out_channels=hidden_shape*4 , kernel_size=3 , stride=1 , padding=1),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2)
#         )
#         self.conv_block_3 = nn.Sequential(
#             nn.Conv2d(in_channels=hidden_shape*4 , out_channels=hidden_shape*6 , kernel_size=3 , stride=1 , padding=1),
#             nn.ReLU(),
#             nn.Conv2d(in_channels=hidden_shape*6 , out_channels=hidden_shape*6 , kernel_size=3 , stride=1 , padding=1),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2)
#         )

#         self.classifier = nn.Sequential(
#             nn.Flatten(),
#             nn.Linear(in_features=301056 , out_features=512),
#             nn.ReLU(),
#             nn.Dropout(p=0.3),
#             )
#         self.age_head = nn.Linear(in_features=512, out_features=1)
#         self.gender_head = nn.Linear(in_features=512, out_features=1)
        
#     def forward(self , x):
#         x = self.conv_block_1(x)
#         x = self.conv_block_2(x)
#         x = self.conv_block_3(x)
#         x = self.classifier(x)
#         gender = self.gender_head(x)
#         age = self.age_head(x)
#         return gender , age

# model = faceCnn2(input_shape=3 , hidden_shape=64 ).to(device)
# model

# Commom Training loop for Scratch CNN

In [None]:
# trackio.init(
#     project="gen_ai_nppe", 
#     space_id="Aryan9797/gen_ai", 
#     name="scratch_cnn_final", 
#     group="simple"
# )

# optimizer = optim.Adam(model.parameters(),lr=0.01)
# gender_loss_fn = nn.BCEWithLogitsLoss()
# age_loss_fn = nn.MSELoss()

# epochs = 10

# for i in tqdm(range(epochs)):
#     train_gender_loss = 0
    # train_age_loss = 0
    # train_total_loss = 0
    # train_batch = 0
    
    # model.train()

    # for batch ,(x,gender,age) in enumerate(train_loader):
    #     x = x.to(device)
    #     gender = gender.to(device)
    #     age = age.to(device)
        

    #     optimizer.zero_grad()
    #     pred_gender , pred_age = model(x)

    #     gender_loss = gender_loss_fn(pred_gender,gender.unsqueeze(1))
    #     age_loss = age_loss_fn(pred_age,age.unsqueeze(1))
        
    #     train_gender_loss+=gender_loss.item()
    #     train_age_loss+= age_loss.item()
        
    #     total_loss = age_loss + gender_loss
    #     train_total_loss+=total_loss.item()
        
    #     total_loss.backward()
    #     optimizer.step()
    #     train_batch+=1

    
    # val_gender_loss = 0
    # val_age_loss = 0
    # val_batch = 0
    # val_total_loss = 0
            
    # model.eval()
    # for batch ,(x,gender,age) in enumerate(val_loader):
    #     x = x.to(device)
    #     gender = gender.to(device)
    #     age = age.to(device)

    #     with torch.inference_mode():
    #         pred_gender , pred_age = model(x)
    #         gender_loss = gender_loss_fn(pred_gender,gender.unsqueeze(1))
    #         age_loss = age_loss_fn(pred_age,age.unsqueeze(1))

#             total_loss = gender_loss + age_loss
#             val_total_loss += total_loss.item()
#             val_gender_loss+=gender_loss.item()
#             val_age_loss+= age_loss.item()
#             val_batch+=1
    
#     trackio.log({
#         "train_gender_loss" : train_gender_loss/train_batch,
#         "train_age_loss" : train_age_loss/train_batch,
#         "train_total_loss": train_total_loss/train_batch,
#         "validation_gender_loss" : val_gender_loss/val_batch,
#         "validation_age_loss" : val_age_loss/val_batch,
#         "validation_total_loss":val_total_loss/val_batch
#     })
#     print(f"Epoch {i+1}/{epochs} | Train Gender Loss: {train_gender_loss/train_batch} | Train Age Loss: {train_age_loss/train_batch} | Train_total_loss: {train_total_loss/train_batch} | Val Gender Loss: {val_gender_loss/val_batch} | Val Age Loss: {val_age_loss/val_batch} | Validation_total_loss: {val_total_loss/val_batch}")

# trackio.finish()

# model 3 (Finetuned CNN)

In [None]:
# model 3 with transfer learning 
import torchvision.models as models

class EfficientNetFaceModel(nn.Module):
    def __init__(self):
        super().__init__()
        
        # loading  pre-trained EfficientNet-B4
        weights = models.EfficientNet_B4_Weights.IMAGENET1K_V1
        self.backbone = models.efficientnet_b4(weights=weights)
        in_features = self.backbone.classifier[1].in_features 
        
        # freeze the backbone
        for param in self.backbone.features.parameters():
            param.requires_grad = False
        self.backbone.classifier = nn.Identity()

        
        head_hidden_features = 512
        
        self.age_head = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(in_features, head_hidden_features),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(head_hidden_features, 1), # output 1 value for age
        )

    #     # Do the same for self.gender_head
        self.gender_head = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(in_features, head_hidden_features),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(head_hidden_features, 1) # output 1 logit for gender
        )

    def forward(self, x):
        x = self.backbone.features(x)
        x = self.backbone.avgpool(x)
        x = torch.flatten(x, 1)
        
        gender = self.gender_head(x)
        age = self.age_head(x)
        
        return gender, age


model2 = EfficientNetFaceModel().to(device)


In [None]:

from torchvision.models import ConvNeXt_Base_Weights, convnext_base

class ConvNeXtFaceModel(nn.Module):
    def __init__(self):
        super().__init__()
        
        weights = ConvNeXt_Base_Weights.IMAGENET1K_V1
        self.backbone = convnext_base(weights=weights)
        

        in_features = self.backbone.classifier[2].in_features
        

        for param in self.backbone.features.parameters():
            param.requires_grad = False
            

        self.backbone.classifier = nn.Identity()

        head_hidden_features = 512
        

        self.age_head = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(in_features, head_hidden_features),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(head_hidden_features, 1),
        )

        self.gender_head = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(in_features, head_hidden_features),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(head_hidden_features, 1)
        )
        

    def forward(self, x):
        x = self.backbone.features(x)
        x = self.backbone.avgpool(x)
        
        x = torch.flatten(x, 1)
        
        gender = self.gender_head(x)
        age = self.age_head(x)
        
        return gender, age

model = ConvNeXtFaceModel().to(device)

# Training for Finetuned CNN

In [None]:
# trackio.init(
#     project="gen_ai_nppe", 
#     space_id="Aryan9797/gen_ai", 
#     name="transfer_with_0.5drop", 
#     group="simple"
# )

# male_count = (train_df['gender'] == 1).sum()
# female_count = (train_df['gender'] == 0).sum()
# total = len(train_df)

# pos_weight = torch.tensor([female_count / male_count]).to(device)

# optimizer = optim.Adam(model.parameters(), lr=0.001)
# scheduler = optim.lr_scheduler.ReduceLROnPlateau(       # lr_scheduler to reduce learning_rate when close to convergence
#     optimizer, 
#     mode='min',   
#     factor=0.1,    
#     patience=2,     
#     verbose=True
# )

# gender_loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
# age_loss_fn = nn.HuberLoss(delta=1.0)

# # Training only added layers with frozen model

# epochs = 20
# for i in tqdm(range(epochs), desc="Stage 1 (Head)"):
#     train_gender_loss = 0
#     train_age_loss = 0
#     train_total_loss = 0
#     train_batch = 0
    
#     model.train()
#     for batch ,(x,gender,age) in enumerate(train_loader):
#         x = x.to(device)
#         gender = gender.to(device)
#         age = age.to(device)
        
#         optimizer.zero_grad()
#         pred_gender , pred_age = model(x)
        
#         g_loss = gender_loss_fn(pred_gender,gender.unsqueeze(1))
#         a_loss_mse = age_loss_fn(pred_age,age.unsqueeze(1)) 
        
#         train_gender_loss+=g_loss.item()
#         train_age_loss+= a_loss_mse.item() 
        
#         age_weight = 0.3            # weighted loss as unit for age is larger then the unit for gender
#         gender_weight = 1.0
#         total_loss = (age_weight * a_loss_mse) + (gender_weight * g_loss)
        
    #     train_total_loss+=total_loss.item()
        
    #     total_loss.backward()
    #     optimizer.step()
    #     train_batch+=1
    
    # val_gender_loss = 0
    # val_age_loss = 0
    # val_batch = 0
    # val_total_loss = 0
            
    # model.eval()
    # for batch ,(x,gender,age) in enumerate(val_loader):
    #     x = x.to(device)
    #     gender = gender.to(device)
    #     age = age.to(device)

    #     with torch.inference_mode():
    #         pred_gender , pred_age = model(x)
            
    #         g_loss = gender_loss_fn(pred_gender,gender.unsqueeze(1))
    #         a_loss_mse = age_loss_fn(pred_age,age.unsqueeze(1)) 
            
    #         age_weight = 0.3
    #         gender_weight = 1.0
    #         total_loss = (age_weight * a_loss_mse) + (gender_weight * g_loss)
            
    #         val_total_loss += total_loss.item()
            
#             val_gender_loss+=g_loss.item()
#             val_age_loss+= a_loss_mse.item() 
#             val_batch+=1
            
#     val_loss_for_scheduler = val_total_loss / val_batch
#     scheduler.step(val_loss_for_scheduler)
    
#     trackio.log({
#         "epoch": i,
#         "train_gender_loss" : train_gender_loss/train_batch,
#         "train_age_loss" : train_age_loss/train_batch,
#         "train_total_loss": train_total_loss/train_batch,
#         "validation_gender_loss" : val_gender_loss/val_batch,
#         "validation_age_loss" : val_age_loss/val_batch,
#         "validation_total_loss":val_total_loss/val_batch,
#     })
#     print(f"Epoch {i+1}/{epochs} | Train Gender Loss: {train_gender_loss/train_batch} | Train Age (RMSE) Loss: {train_age_loss/train_batch} | Train_total_loss: {train_total_loss/train_batch} | Val Gender Loss: {val_gender_loss/val_batch} | Val Age (RMSE) Loss: {val_age_loss/val_batch} | Validation_total_loss: {val_total_loss/val_batch}")

# # training whole model with low learning rate for fine-tuning

# for param in model.backbone.features.parameters():
#     param.requires_grad = True

# optimizer = optim.Adam(model.parameters(), lr=0.00005) 
# scheduler = optim.lr_scheduler.ReduceLROnPlateau(
#     optimizer, 
#     mode='min',     
#     factor=0.1,    
#     patience=2,    
#     verbose=True
# )

# epochs = 10
# for i in tqdm(range(epochs), desc="Stage 2 (Fine-Tune)"):
#     train_gender_loss = 0
#     train_age_loss = 0
#     train_total_loss = 0
#     train_batch = 0
    
#     model.train()
#     for batch ,(x,gender,age) in enumerate(train_loader):
#         x = x.to(device)
#         gender = gender.to(device)
#         age = age.to(device)
        
#         optimizer.zero_grad()
#         pred_gender , pred_age = model(x)
        
#         g_loss = gender_loss_fn(pred_gender,gender.unsqueeze(1))
#         a_loss_mse = age_loss_fn(pred_age,age.unsqueeze(1))
        
    #     train_gender_loss+=g_loss.item()
    #     train_age_loss+= a_loss_mse.item()
        
    #     age_weight = 0.3
    #     gender_weight = 1.0
    #     total_loss = (age_weight * a_loss_mse) + (gender_weight * g_loss)
        
    #     train_total_loss+=total_loss.item()
        
    #     total_loss.backward()
    #     optimizer.step()
    #     train_batch+=1
    
    # val_gender_loss = 0
    # val_age_loss = 0
    # val_batch = 0
    # val_total_loss = 0
            
    # model.eval()
    # for batch ,(x,gender,age) in enumerate(val_loader):
    #     x = x.to(device)
    #     gender = gender.to(device)
    #     age = age.to(device)

    #     with torch.inference_mode():
    #         pred_gender , pred_age = model(x)
            
    #         g_loss = gender_loss_fn(pred_gender,gender.unsqueeze(1))
#             a_loss_mse = age_loss_fn(pred_age,age.unsqueeze(1))
            
#             age_weight = 0.3
#             gender_weight = 1.0
#             total_loss = (age_weight * a_loss_mse) + (gender_weight * g_loss)
#             val_total_loss += total_loss.item()
            
#             val_gender_loss+=g_loss.item()
#             val_age_loss+= a_loss_mse.item()
#             val_batch+=1
            
#     val_loss_for_scheduler = val_total_loss / val_batch
#     scheduler.step(val_loss_for_scheduler)
    
#     trackio.log({
#         "epoch": i + 10, 
#         "train_gender_loss" : train_gender_loss/train_batch,
#         "train_age_loss" : train_age_loss/train_batch,
#         "train_total_loss": train_total_loss/train_batch,
#         "validation_gender_loss" : val_gender_loss/val_batch,
#         "validation_age_loss" : val_age_loss/val_batch,
#         "validation_total_loss":val_total_loss/val_batch,
#     })
#     print(f"Epoch {i+11}/{epochs+10} | Train Gender Loss: {train_gender_loss/train_batch} | Train Age (RMSE) Loss: {train_age_loss/train_batch} | Train_total_loss: {train_total_loss/train_batch} | Val Gender Loss: {val_gender_loss/val_batch} | Val Age (RMSE) Loss: {val_age_loss/val_batch} | Validation_total_loss: {val_total_loss/val_batch}")

# trackio.finish()

In [None]:
# trackio.init(
#     project="gen_ai_nppe", 
#     space_id="Aryan9797/gen_ai", 
#     name="ConvNeXt", 
#     group="simple"
# )

# male_count = (train_df['gender'] == 1).sum()
# female_count = (train_df['gender'] == 0).sum()
# total = len(train_df)

# pos_weight = torch.tensor([female_count / male_count]).to(device)

# optimizer = optim.Adam(model.parameters(), lr=0.001)
# scheduler = optim.lr_scheduler.ReduceLROnPlateau(       # lr_scheduler to reduce learning_rate when close to convergence
#     optimizer, 
#     mode='min',   
#     factor=0.1,    
#     patience=2,     
#     verbose=True
# )

# gender_loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
# age_loss_fn = nn.HuberLoss(delta=1.0)

## for early stoping
# best_val_loss = float('inf')
# patience_epochs = 5  # How many epochs to wait after last improvement
# patience_counter = 0
# BEST_MODEL_PATH = "best_model.pth"


# # Training only added layers with frozen model

# epochs = 40
# for i in tqdm(range(epochs), desc="Stage 1 (Head)"):
    # train_gender_loss = 0
    # train_age_loss = 0
    # train_total_loss = 0
    # train_batch = 0
    
    # model.train()
    # for batch ,(x,gender,age) in enumerate(train_loader):
    #     x = x.to(device)
    #     gender = gender.to(device)
    #     age = age.to(device)
        
    #     optimizer.zero_grad()
    #     pred_gender , pred_age = model(x)
        
    #     g_loss = gender_loss_fn(pred_gender,gender.unsqueeze(1))
    #     a_loss_mse = age_loss_fn(pred_age,age.unsqueeze(1)) 
        
    #     train_gender_loss+=g_loss.item()
    #     train_age_loss+= a_loss_mse.item() 
        
    #     age_weight = 0.3        # weighted loss as unit for age is larger then the unit for gender
    #     gender_weight = 1.0
    #     total_loss = (age_weight * a_loss_mse) + (gender_weight * g_loss)
        
    #     train_total_loss+=total_loss.item()
        
    #     total_loss.backward()
    #     optimizer.step()
    #     train_batch+=1
    
    # val_gender_loss = 0
    # val_age_loss = 0
    # val_batch = 0
    # val_total_loss = 0
            
    # model.eval()
    # for batch ,(x,gender,age) in enumerate(val_loader):
    #     x = x.to(device)
    #     gender = gender.to(device)
    #     age = age.to(device)

    #     with torch.inference_mode():
    #         pred_gender , pred_age = model(x)
            
    #         g_loss = gender_loss_fn(pred_gender,gender.unsqueeze(1))
    #         a_loss_mse = age_loss_fn(pred_age,age.unsqueeze(1)) 
            
    #         age_weight = 0.3
    #         gender_weight = 1.0
    #         total_loss = (age_weight * a_loss_mse) + (gender_weight * g_loss)
            
    #         val_total_loss += total_loss.item()
            
    #         val_gender_loss+=g_loss.item()
    #         val_age_loss+= a_loss_mse.item() 
    #         val_batch+=1
            
    # val_loss_for_scheduler = val_total_loss / val_batch
    # scheduler.step(val_loss_for_scheduler)
    
    # # EARLY STOPPING CHECK

    # if val_loss_for_scheduler < best_val_loss:
    #     print(f"✅ Validation loss improved ({best_val_loss:.5f} -> {val_loss_for_scheduler:.5f}). Saving model...")
    #     best_val_loss = val_loss_for_scheduler
    #     torch.save(model.state_dict(), BEST_MODEL_PATH)
    #     patience_counter = 0  # Reset patience
    # else:
    #     patience_counter += 1
    #     print(f"Validation loss did not improve. Patience {patience_counter}/{patience_epochs}")

    # if patience_counter >= patience_epochs:
    #     print("Stopping early due to lack of improvement.")
    #     break  # Exit the loop
    # ============================================

#     trackio.log({
#         "epoch": i,
#         "train_gender_loss" : train_gender_loss/train_batch,
#         "train_age_loss" : train_age_loss/train_batch,
#         "train_total_loss": train_total_loss/train_batch,
#         "validation_gender_loss" : val_gender_loss/val_batch,
#         "validation_age_loss" : val_age_loss/val_batch,
#         "validation_total_loss":val_total_loss/val_batch,
#     })
#     print(f"Epoch {i+1}/{epochs} | Train Gender Loss: {train_gender_loss/train_batch} | Train Age (RMSE) Loss: {train_age_loss/train_batch} | Train_total_loss: {train_total_loss/train_batch} | Val Gender Loss: {val_gender_loss/val_batch} | Val Age (RMSE) Loss: {val_age_loss/val_batch} | Validation_total_loss: {val_total_loss/val_batch}")

# # training whole model with low learning rate for fine-tuning

# for param in model.backbone.features.parameters():
#     param.requires_grad = True

# optimizer = optim.Adam(model.parameters(), lr=0.00005) 
# scheduler = optim.lr_scheduler.ReduceLROnPlateau(
#     optimizer, 
#     mode='min',     
#     factor=0.1,    
#     patience=2,    
#     verbose=True
# )


# patience_counter = 0
# print("\n--- Starting Stage 2 (Fine-Tune) ---")
# # =============================================

# epochs = 40
# for i in tqdm(range(epochs), desc="Stage 2 (Fine-Tune)"):
#     train_gender_loss = 0
#     train_age_loss = 0
#     train_total_loss = 0
#     train_batch = 0
    
#     model.train()
#     for batch ,(x,gender,age) in enumerate(train_loader):
#         x = x.to(device)
#         gender = gender.to(device)
#         age = age.to(device)
        
#         optimizer.zero_grad()
#         pred_gender , pred_age = model(x)
        
#         g_loss = gender_loss_fn(pred_gender,gender.unsqueeze(1))
#         a_loss_mse = age_loss_fn(pred_age,age.unsqueeze(1))
        
    #     train_gender_loss+=g_loss.item()
    #     train_age_loss+= a_loss_mse.item()
        
    #     age_weight = 0.3
    #     gender_weight = 1.0
    #     total_loss = (age_weight * a_loss_mse) + (gender_weight * g_loss)
        
    #     train_total_loss+=total_loss.item()
        
    #     total_loss.backward()
    #     optimizer.step()
    #     train_batch+=1
    
    # val_gender_loss = 0
    # val_age_loss = 0
    # val_batch = 0
    # val_total_loss = 0
            
    # model.eval()
    # for batch ,(x,gender,age) in enumerate(val_loader):
    #     x = x.to(device)
    #     gender = gender.to(device)
    #     age = age.to(device)

    #     with torch.inference_mode():
    #         pred_gender , pred_age = model(x)
            
    #         g_loss = gender_loss_fn(pred_gender,gender.unsqueeze(1))
    #         a_loss_mse = age_loss_fn(pred_age,age.unsqueeze(1))
            
    #         age_weight = 0.3
    #         gender_weight = 1.0
    #         total_loss = (age_weight * a_loss_mse) + (gender_weight * g_loss)
    #         val_total_loss += total_loss.item()
            
    #         val_gender_loss+=g_loss.item()
    #         val_age_loss+= a_loss_mse.item()
    #         val_batch+=1
            
    # val_loss_for_scheduler = val_total_loss / val_batch
    # scheduler.step(val_loss_for_scheduler)
    
    # # EARLY STOPPING CHECK ===
    # if val_loss_for_scheduler < best_val_loss:
    #     print(f"✅ Validation loss improved ({best_val_loss:.5f} -> {val_loss_for_scheduler:.5f}). Saving model...")
    #     best_val_loss = val_loss_for_scheduler
    #     torch.save(model.state_dict(), BEST_MODEL_PATH)
    #     patience_counter = 0  # Reset patience
    # else:
    #     patience_counter += 1
#         print(f"Validation loss did not improve. Patience {patience_counter}/{patience_epochs}")

#     if patience_counter >= patience_epochs:
#         print("Stopping early due to lack of improvement.")
#         break  # Exit the loop

    
#     trackio.log({
#         "epoch": i + 10, 
#         "train_gender_loss" : train_gender_loss/train_batch,
#         "train_age_loss" : train_age_loss/train_batch,
#         "train_total_loss": train_total_loss/train_batch,
#         "validation_gender_loss" : val_gender_loss/val_batch,
#         "validation_age_loss" : val_age_loss/val_batch,
#         "validation_total_loss":val_total_loss/val_batch,
#     })
#     print(f"Epoch {i+11}/{epochs+10} | Train Gender Loss: {train_gender_loss/train_batch} | Train Age (RMSE) Loss: {train_age_loss/train_batch} | Train_total_loss: {train_total_loss/train_batch} | Val Gender Loss: {val_gender_loss/val_batch} | Val Age (RMSE) Loss: {val_age_loss/val_batch} | Validation_total_loss: {val_total_loss/val_batch}")

# trackio.finish()

# # LOAD THE BEST MODEL BEFORE PREDICTION 
# print(f"\nTraining complete. Loading best model from {BEST_MODEL_PATH} with loss {best_val_loss:.5f}")
# model.load_state_dict(torch.load(BEST_MODEL_PATH))


# Saving model in kaggle hub

In [None]:

# MODEL_UPLOAD_DIR = "/kaggle/working/my_face_model_v1" 
# os.makedirs(MODEL_UPLOAD_DIR, exist_ok=True)

# MODEL_SAVE_PATH = os.path.join(MODEL_UPLOAD_DIR, "face_cnn_model.pth")
# torch.save(model.state_dict(), MODEL_SAVE_PATH)
# print(f"Model saved to {MODEL_SAVE_PATH}")

# KAGGLE_USERNAME = 'aryanchauhan97971234' 
# MODEL = 'sctrach_cnn_final'
# FRAMEWORK = 'pytorch'
# VARIATION = 'transfer_with_0.0001and0.01with_loss_weights'

# handle = f'{KAGGLE_USERNAME}/{MODEL}/{FRAMEWORK}/{VARIATION}'

# print(f"Uploading model from {MODEL_UPLOAD_DIR} to {handle}...")

# kagglehub.model_upload(
#     handle,                     
#     MODEL_UPLOAD_DIR,           
#     license_name="Apache 2.0", 
#     version_notes="4th run with transfer, lr scheduler"
# )
# print("Upload complete!")

# Importing final model 

In [None]:
# handle = 'aryanchauhan97971234/sctrach_cnn_final/pytorch/transfer_with_0.0001and0.01with_loss_weights'
# print(f"Downloading model from {handle}...")
# model_dir = kagglehub.model_download(handle)
# print(f"Model downloaded to: {model_dir}")
# model_file_path = os.path.join(model_dir, "face_cnn_model.pth")
# state_dict = torch.load(model_file_path, map_location=torch.device('cpu'))
# model.load_state_dict(state_dict)
# model.to(device)
# print("Model weights loaded successfully!")


In [None]:
handle = 'aryanchauhan97971234/face-age-transfer2-loss-weights/pytorch/transfer_with_0.0001and0.01with_loss_weights'
print(f"Downloading model from {handle}...")
model_dir = kagglehub.model_download(handle)
print(f"Model downloaded to: {model_dir}")
model_file_path = os.path.join(model_dir, "face_cnn_model.pth")
state_dict = torch.load(model_file_path, map_location=torch.device('cpu'))
model2.load_state_dict(state_dict)
model2.to(device)
print("Model weights loaded successfully!")

handle = 'aryanchauhan97971234/ConvNeXt/pytorch/transfer_with_0.0001and0.01with_loss_weights'
print(f"Downloading model from {handle}...")
model_dir = kagglehub.model_download(handle)
print(f"Model downloaded to: {model_dir}")
model_file_path = os.path.join(model_dir, "face_cnn_model.pth")
state_dict = torch.load(model_file_path, map_location=torch.device('cpu'))
model.load_state_dict(state_dict)
model.to(device)
print("Model weights loaded successfully!")



# Creating final submission.csv

In [None]:

weight_model1 = 0.8  # 85% weight for the main model
weight_model2 = 0.2  # 15% weight for the second model


gender, age = [], []
model.eval()
model2.eval() 

with torch.inference_mode():
    for x in tqdm(test_loader):
        x = x.to(device)
        
        # predictions for model 1 
        pred_gender_orig, pred_age_orig = model(x)
        x_flipped = torch.flip(x, [3]) 
        pred_gender_flip, pred_age_flip = model(x_flipped)
        # Average for model 1
        pred_gender_model1 = (pred_gender_orig + pred_gender_flip) / 2
        pred_age_model1 = (pred_age_orig + pred_age_flip) / 2
        
        # predictions for model 2
        pred_gender_orig2, pred_age_orig2 = model2(x)
        pred_gender_flip2, pred_age_flip2 = model2(x_flipped)
        # Average TTA for model 2
        pred_gender_model2 = (pred_gender_orig2 + pred_gender_flip2) / 2
        pred_age_model2 = (pred_age_orig2 + pred_age_flip2) / 2

        # weighted average
        pred_gender = (weight_model1 * pred_gender_model1) + (weight_model2 * pred_gender_model2)
        pred_age = (weight_model1 * pred_age_model1) + (weight_model2 * pred_age_model2)
        
        # Convert to final predictions
        pred_gender = (pred_gender.squeeze(1) > 0).int()  # Using 0 as threshold
        gender.extend(pred_gender.detach().cpu().numpy().astype(int).tolist())
        
        pred_age = pred_age.squeeze(1).detach().cpu().numpy()
        rounded_ages = np.round(pred_age).astype(int)
        rounded_ages[rounded_ages < 0] = 0
        age.extend(rounded_ages.tolist())

print("Gender samples:", gender[:10])
print("Age samples:", age[:10])
sample_submission["gender"] = gender
sample_submission["age"] = age
sample_submission.to_csv("submission.csv", index=False)
print("✅ CSV written.")

In [None]:
# gender, age = [], []
# model.eval()

# with torch.inference_mode():
#     for x in tqdm(test_loader):
#         x = x.to(device)
        
#         # predictions for model 1 
#         pred_gender_orig, pred_age_orig = model(x)
#         x_flipped = torch.flip(x, [3]) 
#         pred_gender_flip, pred_age_flip = model(x_flipped)
#         # Average for model 1
#         pred_gender_model1 = (pred_gender_orig + pred_gender_flip) / 2
#         pred_age_model1 = (pred_age_orig + pred_age_flip) / 2

#         # Convert to final predictions
#         pred_gender = (pred_gender_model1.squeeze(1) > 0).int()  # Using 0 as threshold
#         gender.extend(pred_gender.detach().cpu().numpy().astype(int).tolist())
        
#         pred_age = pred_age_model1.squeeze(1).detach().cpu().numpy()
#         rounded_ages = np.round(pred_age).astype(int)
#         rounded_ages[rounded_ages < 0] = 0
#         age.extend(rounded_ages.tolist())

# print("Gender samples:", gender[:10])
# print("Age samples:", age[:10])
# sample_submission["gender"] = gender
# sample_submission["age"] = age
# sample_submission.to_csv("submission.csv", index=False)
# print("✅ CSV written.")
 