In [None]:
import torch 
import numpy as np
import pandas as pd
import torch.nn.functional as F
import torch.nn as nn
import matplotlib.pyplot as plt
import os
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from bs4 import BeautifulSoup
import requests 
import regex as re
import csv
from sklearn.metrics import mean_squared_error
from torch.nn.utils import clip_grad_norm_
import torchvision.models as models
from torch.optim import Adam

In [None]:
url = "https://height-weight-chart.com/"
resp = requests.get(url)
soup = BeautifulSoup(resp.content, "html.parser")
# print(soup.prettify())
pattern = "\d{3}-\d{3}.html"
res = soup.find_all('a', {"href": re.compile(pattern)})
print(res[0].img["src"])
results = ["https://height-weight-chart.com/" + res[i].img["src"] for i in range(len(res))]
weights = [int(str(res[i].img["title"].split(","))[9:12])*0.4532 for i in range(len(res))]

In [None]:
import os
folder_name = "images"
if not os.path.exists(folder_name):
    os.makedirs(folder_name)

for idx, img_url in enumerate(results):
    img_resp = requests.get(img_url, stream=True)
    img_resp.raise_for_status()
    img_path = os.path.join(folder_name, f'image_{idx}.jpg')
    with open(img_path, 'wb') as img_file:
        for chunk in img_resp.iter_content(chunk_size=8192):
            img_file.write(chunk)

In [None]:
images = os.listdir("images")
print(len(images))
weights = weights
data = pd.DataFrame({"image": images, "weight" : weights})
data.to_csv("data.csv")

In [None]:
d = pd.read_csv("data.csv")
d.head(10)

In [None]:
class WeightDataset(Dataset):
    
    def __init__(self, csv_file, root_dir, transform=None):
        self.data_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(root_dir, self.data_frame.iloc[idx, 1])
        image = Image.open(img_name)
        label = self.data_frame.iloc[idx, 2]

        if self.transform:
            image = self.transform(image)

        return image, float(label)

# Define paths and other parameters
csv_file = 'data.csv'
root_dir = 'images'
batch_size = 128

# Define data transformations
data_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.RandomGrayscale(p=0.2),
    transforms.RandomHorizontalFlip(p=0.2),
    transforms.RandomVerticalFlip(p=0.2),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
dataset = WeightDataset(csv_file=csv_file, root_dir=root_dir, transform=data_transform)
dataset[10]

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),  # 224x224
            nn.GELU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # 112x112
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1), 
            nn.GELU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # 56x56
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.GELU(),
            nn.MaxPool2d(kernel_size=2, stride=2)   # 28x28
        )
        self.fc_layers = nn.Sequential(
            nn.Linear(64 * 28 * 28, 512),  
            nn.ReLU(),
            nn.Linear(512, 1),
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1) 
        x = self.fc_layers(x)
        return x.view(-1)  

In [None]:
LEARNING_RATE = 0.009
WEIGHT_DECAY = 0.008
BATCH_SIZE = 64
EPOCHS = 1000
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# class WeightModel(nn.Module):
#     def __init__(self, num_classes):
#         super(WeightModel, self).__init__()
#         self.resnet50 = models.resnet50(pretrained=True)
#         self.resnet50.fc = nn.Linear(self.resnet50.fc.in_features, num_classes)

#     def forward(self, x):
#         return self.resnet50(x)

trainloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
# model = WeightModel(NUM_CLASSES).to(device)  
model = SimpleCNN().to(device)
optimizer = Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.MSELoss()


for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for batch_idx, (images, targets) in enumerate(trainloader):
        images = images.to(device).float()
        targets = targets.to(device).float()
        output = model(images)
        loss = criterion(output, targets)
        optimizer.zero_grad()
        loss.backward()
        clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        running_loss += loss.item()

    avg_loss = running_loss / len(trainloader)
    print(f"Epoch [{epoch+1}/{EPOCHS}], Avg Loss: {avg_loss:.4f}")

torch.save(model.state_dict(), "model.pth")

In [None]:
DATA_DIR = "images"
train_images = os.listdir(DATA_DIR)
print(len(train_images))

In [None]:
model_path = 'model.pth'
model = SimpleCNN()  # Replace with your model class
model.load_state_dict(torch.load(model_path))
model.eval()
pred_weights = []

for idx, img in enumerate(train_images):
    img_path = os.path.join(DATA_DIR, img)
    image = Image.open(img_path)
    image = data_transform(image)  
    image = image.unsqueeze(0)  

    with torch.no_grad():
        output = model(image)
#         print(output)
        pred_weights.append(output.cpu().detach().numpy().astype("float")[0])

In [None]:
pred_weights

In [None]:
pred_weights = np.array(pred_weights)
weights = np.array(weights)
rmse_per = np.sqrt(mean_squared_error(pred_weights, weights))/(np.max(weights) - np.min(weights))
print(rmse_per*100)

In [None]:
# Inference

DATA_DIR = "Dataset"
test_images = os.listdir(DATA_DIR)
print(len(test_images))

model_path = 'model.pth'
model = SimpleCNN()
model.load_state_dict(torch.load(model_path))
model.eval()
pred_weights = []

for idx, img in enumerate(test_images):
    img_path = os.path.join(DATA_DIR, img)
    image = Image.open(img_path)
    image = data_transform(image)  # No need to unsqueeze(1) here
    image = image.unsqueeze(0)  # Add batch dimension

    with torch.no_grad():
        output = model(image)
#         print(output)
        pred_weights.append(output.cpu().detach().numpy().astype("float")[0])
        with open("sample_submission.csv", "a") as file:
            writer = csv.writer(file)
            writer.writerow([img, np.round(output.cpu().detach().numpy().astype("float")[0])])