In [5]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [4]:
import os
import zipfile

zip_path = '/content/drive/MyDrive/facial-rating-cnns/data/archive.zip'
extract_path = '/content/drive/MyDrive/facial-rating-cnns/data/'

# 1. Force Extraction (Ignore if folder exists)
print(f"Extracting {zip_path} to {extract_path}...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)
print("Extraction finished.")

# 2. Debug: List what is actually in the folder
print("\n--- Folder Contents ---")
files = os.listdir(extract_path)
for f in files:
    print(f)

Extracting /content/drive/MyDrive/facial-rating-cnns/data/archive.zip to /content/drive/MyDrive/facial-rating-cnns/data/...


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/facial-rating-cnns/data/archive.zip'

In [8]:
import os
import shutil
import zipfile

# 1. Define Paths
# Source: Where your zip is right now (in Drive)
drive_zip_path = '/content/drive/MyDrive/facial-rating-cnns/data/archive.zip'

# Destination: Local Colab Machine (Super fast I/O)
local_zip_path = '/content/archive.zip'
local_extract_path = '/content/fast_data'

# 2. Copy and Unzip Locally
print(f"Copying data from Drive to Local Disk... (This fixes the lag)")
shutil.copy(drive_zip_path, local_zip_path)

print("Unzipping locally...")
if not os.path.exists(local_extract_path):
    os.makedirs(local_extract_path)
    with zipfile.ZipFile(local_zip_path, 'r') as zip_ref:
        zip_ref.extractall(local_extract_path)

print("Done! Data is now on local disk.")

# 3. UPDATE PATHS for Data Loading
# We overwrite the variables so you don't need to change your other code
base_dir = local_extract_path
image_dir = os.path.join(base_dir, 'Images')
label_path = os.path.join(base_dir, 'labels.txt')

print(f"New fast image dir: {image_dir}")

Copying data from Drive to Local Disk... (This fixes the lag)
Unzipping locally...
Done! Data is now on local disk.
New fast image dir: /content/fast_data/Images


In [9]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import pandas as pd
import os
from sklearn.model_selection import train_test_split

# --- 1. Define Correct Paths (Based on your os.listdir) ---
base_dir = '/content/fast_data'
image_dir = os.path.join(base_dir, 'Images/Images')
label_path = os.path.join(base_dir, 'labels.txt')

print(f"Checking label file at: {label_path}")

# --- 2. Load Labels ---
try:
    # Read the file to inspect formatting
    with open(label_path, 'r') as f:
        print(f"First line of file: {f.readline().strip()}")

    # Load dataframe (Assuming standard format: ImageName Rating)
    # We try comma first, then space if that fails, common in Kaggle re-uploads
    try:
        df = pd.read_csv(label_path, sep=',', header=None, names=['Image', 'Rating'])
        # If the first column looks like a float/int, it might be the rating (bad parse), check naming
        if not str(df.iloc[0, 0]).endswith(('.jpg', '.jpeg', '.png')):
             df = pd.read_csv(label_path, sep='\s+', header=None, names=['Image', 'Rating'])
    except:
        df = pd.read_csv(label_path, sep='\s+', header=None, names=['Image', 'Rating'])

    # Filter out any header rows if they exist (e.g. if the file has "Image,Rating" as text)
    if not str(df.iloc[0, 1]).replace('.','',1).isdigit():
        df = pd.read_csv(label_path, sep=None, engine='python') # Auto-detect header

    print(f"Success! Loaded {len(df)} labels.")
    print(df.head())

except Exception as e:
    print(f"CRITICAL ERROR reading labels: {e}")

# --- 3. Dataset Class ---
class FaceBeautyDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = str(self.dataframe.iloc[idx, 0])
        # Fix for some dataset versions that might have paths in the filename
        if '/' in img_name or '\\' in img_name:
            img_name = os.path.basename(img_name)

        full_img_path = os.path.join(self.root_dir, img_name)

        try:
            image = Image.open(full_img_path).convert('RGB')
        except:
            image = Image.new('RGB', (224, 224)) # Black placeholder

        # Ensure rating is float
        rating = float(self.dataframe.iloc[idx, 1])
        rating = torch.tensor(rating, dtype=torch.float32)

        if self.transform:
            image = self.transform(image)

        return image, rating

# --- 4. Create Loaders ---
if 'df' in locals():
    # Transforms
    data_transforms = {
        'train': transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'val': transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }

    train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

    train_dataset = FaceBeautyDataset(train_df, image_dir, transform=data_transforms['train'])
    val_dataset = FaceBeautyDataset(val_df, image_dir, transform=data_transforms['val'])

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

    print("Data Loaders Ready. Proceed to Training.")
else:
    print("Fix the label loading error above first.")

Checking label file at: /content/fast_data/labels.txt
First line of file: CF437.jpg 2.883333
Success! Loaded 5500 labels.
        Image    Rating
0   CF437.jpg  2.883333
1  AM1384.jpg  2.466667
2  AM1234.jpg  2.150000
3  AM1774.jpg  3.750000
4   CF215.jpg  3.033333
Data Loaders Ready. Proceed to Training.


  df = pd.read_csv(label_path, sep='\s+', header=None, names=['Image', 'Rating'])
  df = pd.read_csv(label_path, sep='\s+', header=None, names=['Image', 'Rating'])


In [10]:
import torch.nn as nn
import torch.optim as optim
from torchvision import models
import time
import copy

def train_model(model, criterion, optimizer, num_epochs=20):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Training on: {device}")
    model = model.to(device)

    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = float('inf')

    dataloaders = {'train': train_loader, 'val': val_loader}
    dataset_sizes = {'train': len(train_dataset), 'val': len(val_dataset)}

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    preds = outputs.squeeze()
                    loss = criterion(preds, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)

            epoch_loss = running_loss / dataset_sizes[phase]
            print(f'{phase} Loss: {epoch_loss:.4f}')

            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model.state_dict(), 'best_face_rater_colab.pth')

    print(f'Best val Loss: {best_loss:.4f}')
    model.load_state_dict(best_model_wts)
    return model

# Initialize and Train
model = models.resnet18(weights='DEFAULT')
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 1)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

trained_model = train_model(model, criterion, optimizer, num_epochs=15)

Training on: cuda
Epoch 0/14
----------
train Loss: 0.8945
val Loss: 0.1280
Epoch 1/14
----------
train Loss: 0.0984
val Loss: 0.1835
Epoch 2/14
----------
train Loss: 0.0820
val Loss: 0.1022
Epoch 3/14
----------
train Loss: 0.0586
val Loss: 0.1166
Epoch 4/14
----------
train Loss: 0.0507
val Loss: 0.1083
Epoch 5/14
----------
train Loss: 0.0414
val Loss: 0.1167
Epoch 6/14
----------
train Loss: 0.0367
val Loss: 0.0940
Epoch 7/14
----------
train Loss: 0.0348
val Loss: 0.0950
Epoch 8/14
----------
train Loss: 0.0288
val Loss: 0.0910
Epoch 9/14
----------
train Loss: 0.0291
val Loss: 0.1598
Epoch 10/14
----------
train Loss: 0.0242
val Loss: 0.0933
Epoch 11/14
----------
train Loss: 0.0188
val Loss: 0.0858
Epoch 12/14
----------
train Loss: 0.0195
val Loss: 0.0883
Epoch 13/14
----------
train Loss: 0.0212
val Loss: 0.0932
Epoch 14/14
----------
train Loss: 0.0193
val Loss: 0.0912
Best val Loss: 0.0858


In [13]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image

def predict_beauty_score(image_path, model_path='best_face_rater_colab.pth'):
    # 1. Setup Device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # 2. Recreate Model Architecture
    model = models.resnet18(weights=None)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 1)

    # 3. Load Weights
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()

    # 4. Transform Image
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    # 5. Predict
    image = Image.open(image_path).convert('RGB')
    input_tensor = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(input_tensor)
        score = output.item()

    return score

# --- RUN IT ---
# Change 'test.jpg' to the name of the file you uploaded
img_name = 'test.jpg'

# Simple check to avoid crash if you haven't uploaded yet
import os
if os.path.exists(img_name):
    score = predict_beauty_score(img_name)
    print(f"Predicted Rating: {score:.2f} / 5.0")
else:
    print(f"Please upload an image named '{img_name}' to the Colab files sidebar first!")

Predicted Rating: 3.66 / 5.0
