## Importing the Packages

In [2]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from PIL import Image
from tqdm import tqdm

# **Main Model**

In [14]:
# accessing the images and changing the labels into binary encoding 
class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_name = self.img_labels.iloc[idx, 0]
        if not img_name.lower().endswith('.png'):
            img_name += '.png'
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        
        label_ball_1 = self.img_labels.iloc[idx, 1]
        label_ball_2 = self.img_labels.iloc[idx, 2]
        labels = [0, 0, 0, 0] 
        labels[label_ball_1] = 1
        labels[label_ball_2] = 1
        
        return image, torch.tensor(labels, dtype=torch.float32)

# paths for annotations and the images directory for our training
annotations_file = "/kaggle/input/sportsball-ann/SportBalls/LABELS/train_labels.csv"
img_dir = "/kaggle/input/sportsball-ann/SportBalls/Train"
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])

full_dataset = CustomImageDataset(annotations_file, img_dir, transform=transform)

# Splitting the data into train and validation
train_ratio = 0.8
total_size = len(full_dataset)
train_size = int(train_ratio * total_size)
val_size = total_size - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Create DataLoaders for each subset of train and validation
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

# checking the train loader and validation loader sizes after batching
print("Train Loader Size:", len(train_loader))
print("Validation Loader Size:", len(val_loader))


class BallClassifierCNN(nn.Module):
    def __init__(self):
        super(BallClassifierCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.dropout = nn.Dropout(0.2)
        self.fc1 = nn.Linear(64 * 16 * 16, 128)
        self.fc2 = nn.Linear(128, 4)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = x.view(-1, 64 *16 * 16) 
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
model = BallClassifierCNN().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

print(model)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    with tqdm(train_loader, unit="batch") as tepoch:
        for images, labels in tepoch:
            tepoch.set_description(f"Epoch {epoch + 1}")
            images = images.to(device)
            labels = labels.to(device)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            predicted = torch.sigmoid(outputs) > 0.5
            total_train += labels.numel()
            correct_train += (predicted == labels).sum().item()
            
            tepoch.set_postfix(loss=loss.item())

    train_accuracy = 100 * correct_train / total_train
    train_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%')
    
    # Validation loop
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            predicted = torch.sigmoid(outputs) > 0.5
            total += labels.numel()
            correct += (predicted == labels).sum().item()

    val_accuracy = 100 * correct / total
    val_loss_avg = val_loss / len(val_loader)
    print(f'Validation Loss: {val_loss_avg:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

print('Training complete.')


Train Loader Size: 125
Validation Loader Size: 32
BallClassifierCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.2, inplace=False)
  (fc1): Linear(in_features=16384, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=4, bias=True)
)


Epoch 1: 100%|██████████| 125/125 [00:10<00:00, 11.76batch/s, loss=0.192]


Epoch [1/10], Train Loss: 0.3974, Train Accuracy: 81.75%
Validation Loss: 0.1995, Validation Accuracy: 94.09%


Epoch 2: 100%|██████████| 125/125 [00:11<00:00, 11.18batch/s, loss=0.126] 


Epoch [2/10], Train Loss: 0.1577, Train Accuracy: 94.89%
Validation Loss: 0.0938, Validation Accuracy: 97.45%


Epoch 3: 100%|██████████| 125/125 [00:11<00:00, 10.98batch/s, loss=0.1]   


Epoch [3/10], Train Loss: 0.0997, Train Accuracy: 97.09%
Validation Loss: 0.0844, Validation Accuracy: 97.38%


Epoch 4: 100%|██████████| 125/125 [00:11<00:00, 11.22batch/s, loss=0.083] 


Epoch [4/10], Train Loss: 0.0773, Train Accuracy: 97.71%
Validation Loss: 0.0529, Validation Accuracy: 98.61%


Epoch 5: 100%|██████████| 125/125 [00:10<00:00, 11.65batch/s, loss=0.0452]


Epoch [5/10], Train Loss: 0.0636, Train Accuracy: 98.19%
Validation Loss: 0.0617, Validation Accuracy: 98.12%


Epoch 6: 100%|██████████| 125/125 [00:10<00:00, 11.75batch/s, loss=0.0739]


Epoch [6/10], Train Loss: 0.0550, Train Accuracy: 98.36%
Validation Loss: 0.0466, Validation Accuracy: 98.78%


Epoch 7: 100%|██████████| 125/125 [00:11<00:00, 10.90batch/s, loss=0.0979]


Epoch [7/10], Train Loss: 0.0537, Train Accuracy: 98.43%
Validation Loss: 0.0537, Validation Accuracy: 98.21%


Epoch 8: 100%|██████████| 125/125 [00:11<00:00, 11.28batch/s, loss=0.0581]


Epoch [8/10], Train Loss: 0.0454, Train Accuracy: 98.66%
Validation Loss: 0.0396, Validation Accuracy: 98.89%


Epoch 9: 100%|██████████| 125/125 [00:10<00:00, 11.49batch/s, loss=0.0139]


Epoch [9/10], Train Loss: 0.0412, Train Accuracy: 98.80%
Validation Loss: 0.0414, Validation Accuracy: 98.83%


Epoch 10: 100%|██████████| 125/125 [00:11<00:00, 11.19batch/s, loss=0.0246]


Epoch [10/10], Train Loss: 0.0364, Train Accuracy: 98.89%
Validation Loss: 0.0385, Validation Accuracy: 98.99%
Training complete.


In [15]:
torch.save(model.state_dict(), 'BallClassifierModel.pth')

# **Predicting the Labels of the Test Data**

In [16]:
test_dir = "/kaggle/input/sportsball/SportBalls/Test"
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])

# Loading  the trained model  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = BallClassifierCNN()
model.load_state_dict(torch.load('/kaggle/working/BallClassifierModel.pth'))
model.to(device)
# setting the model to evaluation state
model.eval()

# Function to predict labels for a single image
def predict_image(image_path, model, transform, device):
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(image)
        prediction = torch.sigmoid(output) > 0.5
    return prediction.cpu().numpy().astype(int).flatten()

# make predictions for the test images
test_predictions = []
image_paths = []
for img_name in tqdm(os.listdir(test_dir), desc="Predicting"):
    img_path = os.path.join(test_dir, img_name)
    if img_path.split(".")[1] == "csv":
        continue
    else: 
        pred = predict_image(img_path, model, transform, device)
        test_predictions.append(pred)
        image_paths.append(img_name)

# saving the test precictions  to a CSV file
test_preds = pd.DataFrame(test_predictions, columns=['baseball', 'basketball', 'vollebyball', 'soccerball'])
test_preds.insert(0, 'Image', image_paths)
test_preds.to_csv('test_preds.csv', index=False)

Predicting: 100%|██████████| 1002/1002 [00:05<00:00, 174.10it/s]


# **Predicting the labels of Train Data**

In [17]:

train_dir = "/kaggle/input/sportsball-ann/SportBalls/Train"
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])

# Load the model  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.eval()

# predicting labels for a single image
def predict_image(image_path, model, transform, device):
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(image)
        prediction = torch.sigmoid(output) > 0.5
    return prediction.cpu().numpy().astype(int).flatten()

train_predictions = []
image_paths = []
for img_name in tqdm(os.listdir(train_dir), desc="Predicting"):
    img_path = os.path.join(train_dir, img_name)
    
    pred = predict_image(img_path, model, transform, device)
    train_predictions.append(pred)
    image_paths.append(img_name)

train_preds = pd.DataFrame(train_predictions, columns=['baseball', 'basketball', 'vollebyball', 'soccerball'])
train_preds.insert(0, 'Image', image_paths)

Predicting: 100%|██████████| 10000/10000 [00:17<00:00, 570.03it/s]


In [18]:
# sorting the images in the train_preds as they are irregular when taken from the directory for predictions
train_preds['Numeric_Image'] = train_preds['Image'].str.extract(r'(\d+)', expand=False).astype(int)

train_preds_sorted = train_preds.sort_values('Numeric_Image').drop('Numeric_Image', axis=1)

train_preds_sorted.reset_index(drop=True, inplace=True)
train_preds_sorted.head()

Unnamed: 0,Image,baseball,basketball,vollebyball,soccerball
0,img_00000.png,0,0,0,1
1,img_00001.png,0,0,1,0
2,img_00002.png,1,1,0,0
3,img_00003.png,1,0,0,0
4,img_00004.png,0,0,1,0


In [19]:
# converting the given train  labels into binary
annotations_file = "/kaggle/input/sportsball-ann/SportBalls/LABELS/train_labels.csv"
train_labels = pd.read_csv(annotations_file, header= None)
train_labels.columns = ["Image","Label-1","Label-2"]
new_labels = []
for row in range(train_labels.shape[0]):
    label_ball_1 = train_labels.iloc[row, 1]
    label_ball_2 = train_labels.iloc[row, 2]
    labels = [0, 0, 0, 0]  
    labels[label_ball_1] = 1
    labels[label_ball_2] = 1
    
    current_label = [train_labels.loc[row,"Image"]] + labels
    new_labels.append(current_label)

train_true_labels = pd.DataFrame(new_labels,columns=train_preds_sorted.columns)
train_true_labels.head()

Unnamed: 0,Image,baseball,basketball,vollebyball,soccerball
0,img_00000,0,0,1,1
1,img_00001,0,0,1,0
2,img_00002,1,1,0,0
3,img_00003,1,0,0,0
4,img_00004,0,0,1,0


In [20]:
# comparing the each columns of train_true_labels and train_preds_sorted to get the accuracy of ball
accuracy_df = {}
for i in train_preds_sorted.columns:
    if i != "Image":
        a=np.sum(train_preds_sorted[i] == train_true_labels[i]) / train_preds_sorted.shape[0] *100
        accuracy_df[i] = a

In [21]:
accuracy_df = pd.Series(accuracy_df)
print(accuracy_df)

baseball       99.30
basketball     99.43
vollebyball    99.00
soccerball     99.34
dtype: float64
