In [None]:

!pip install openpyxl




In [None]:
import pandas as pd
import requests
from PIL import Image
from io import BytesIO
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
import torch
import torch.nn as nn
import torch.optim as optim
import os

In [None]:
# Step 1: Load Dataset
df = pd.read_excel("/content/dataset (1) (1).xlsx")
df = df[df['Spitting Yes/No'].isin(['Yes', 'No'])]

# Step 2: Define Custom Dataset
class SpitDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        url = self.df.loc[idx, 'Image address']
        label = 1 if self.df.loc[idx, 'Spitting Yes/No'] == 'Yes' else 0

        try:
            response = requests.get(url, timeout=5)
            img = Image.open(BytesIO(response.content)).convert('RGB')
        except:
            img = Image.new('RGB', (128, 128))  # Black dummy image on failure

        if self.transform:
            img = self.transform(img)

        return img, label

In [None]:
# Step 3: Transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

In [None]:
# Step 4: Load dataset
full_dataset = SpitDataset(df, transform=transform)

In [None]:
# Split into train and val
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_ds, val_ds = random_split(full_dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=8, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=8)


In [None]:
# Step 5: Define CNN in PyTorch
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(128 * 16 * 16, 128), nn.ReLU(),
            nn.Linear(128, 1), nn.Sigmoid()
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

# Step 6: Train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training Loop
epochs = 30
for epoch in range(epochs):
    model.train()
    train_loss = 0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.float().to(device).view(-1, 1)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs} - Loss: {train_loss:.4f}")

Epoch 1/30 - Loss: 3.6811
Epoch 2/30 - Loss: 3.3862
Epoch 3/30 - Loss: 3.3726
Epoch 4/30 - Loss: 3.3086
Epoch 5/30 - Loss: 3.2487
Epoch 6/30 - Loss: 3.1548
Epoch 7/30 - Loss: 3.0753
Epoch 8/30 - Loss: 3.1946
Epoch 9/30 - Loss: 2.8043
Epoch 10/30 - Loss: 2.8841
Epoch 11/30 - Loss: 2.4949
Epoch 12/30 - Loss: 2.5871
Epoch 13/30 - Loss: 2.8782
Epoch 14/30 - Loss: 2.1905
Epoch 15/30 - Loss: 2.1282
Epoch 16/30 - Loss: 1.7795
Epoch 17/30 - Loss: 1.2993
Epoch 18/30 - Loss: 1.3724
Epoch 19/30 - Loss: 4.7310
Epoch 20/30 - Loss: 3.6252
Epoch 21/30 - Loss: 2.4830
Epoch 22/30 - Loss: 2.5540
Epoch 23/30 - Loss: 2.0694
Epoch 24/30 - Loss: 1.7149
Epoch 25/30 - Loss: 1.2734
Epoch 26/30 - Loss: 2.2022
Epoch 27/30 - Loss: 2.7227
Epoch 28/30 - Loss: 1.3206
Epoch 29/30 - Loss: 1.1498
Epoch 30/30 - Loss: 0.8587


In [None]:
# Step 7: Evaluate
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for imgs, labels in val_loader:
        imgs, labels = imgs.to(device), labels.to(device).view(-1, 1).float()
        outputs = model(imgs)
        predicted = (outputs > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"\nValidation Accuracy: {(correct/total)*100:.2f}%")


Validation Accuracy: 90.00%
