In [1]:
# Import basic libraries to handle data and build the model
import zipfile
import os

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader,Dataset
from torchvision import datasets, transforms
from torch.utils.data import random_split

In [2]:
!git clone https://github.com/Prashant-AV/Qualcomm-DL-Hackathon.git

Cloning into 'Qualcomm-DL-Hackathon'...
remote: Enumerating objects: 10, done.[K
remote: Counting objects: 100% (10/10), done.[K
remote: Compressing objects: 100% (10/10), done.[K
remote: Total 10 (delta 1), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (10/10), 30.68 MiB | 26.03 MiB/s, done.
Resolving deltas: 100% (1/1), done.


In [3]:
# Set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
os.chdir("/content/Qualcomm-DL-Hackathon/train")
extract_dir = "/content/Qualcomm-DL-Hackathon/train/"
os.makedirs(extract_dir, exist_ok=True)

# Open and extract the zip file
with zipfile.ZipFile("images part-1.zip", 'r') as zip_ref:
    zip_ref.extractall(extract_dir)
with zipfile.ZipFile("images part-2.zip", 'r') as zip_ref:
    zip_ref.extractall(extract_dir)
print(f"Contents extracted to {extract_dir}")

Contents extracted to /content/Qualcomm-DL-Hackathon/train/


In [5]:
os.rename('/content/Qualcomm-DL-Hackathon/train/images part-1', '/content/Qualcomm-DL-Hackathon/train/images')

In [6]:
import shutil
for x in os.listdir('/content/Qualcomm-DL-Hackathon/train/images part-2'):
  source_path = os.path.join('/content/Qualcomm-DL-Hackathon/train/images part-2', x)
  shutil.move(source_path, '/content/Qualcomm-DL-Hackathon/train/images/')

In [7]:
#shutil.rmtree('/content/Qualcomm-DL-Hackathon/train/images')
shutil.rmtree('/content/Qualcomm-DL-Hackathon/train/images part-2')

In [8]:
from PIL import Image
os.getcwd()

'/content/Qualcomm-DL-Hackathon/train'

In [9]:
#reading images to check size
count = 0
for x in os.listdir('/content/Qualcomm-DL-Hackathon/train/images/'):
  img = Image.open('/content/Qualcomm-DL-Hackathon/train/images/'+x)
  count+=1
count

2352

In [10]:
img.size

(224, 224)

In [11]:
os.chdir("/content/Qualcomm-DL-Hackathon/train/")

In [12]:
import pandas as pd

pd.read_csv('train.csv').head()

Unnamed: 0,image_names,emergency_or_not
0,1503.jpg,0
1,1420.jpg,0
2,1764.jpg,0
3,1356.jpg,0
4,1117.jpg,0


In [13]:
pd.read_csv('train.csv').shape

(1646, 2)

In [14]:
aug_transform = transforms.Compose([
    transforms.RandomRotation(20),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
])

In [15]:
def load_and_augment_images(image_folder, csv_file, transform):
    augmented_data = []
    df = pd.read_csv(csv_file)
    for index, row in df.iterrows():
        img_path = os.path.join(image_folder, row['image_names'])
        img = Image.open(img_path)

        # Save original image info
        augmented_data.append({'image_names': row['image_names'], 'label': row['emergency_or_not']})

        for i in range(4):  # Generate 4 augmented images per original image
            augmented_img = transform(img)
            save_path = os.path.join('/content/Qualcomm-DL-Hackathon/train/images/', f"aug_{index}_{i}.jpg")
            save_image(augmented_img, save_path)
            augmented_data.append({'image_names': f"aug_{index}_{i}.jpg", 'label': row['emergency_or_not']})

    augmented_df = pd.DataFrame(augmented_data)
    augmented_df.to_csv('aug_train.csv', index=False)

def save_image(image, path):
    image.save(path, format='jpeg')

In [16]:
load_and_augment_images(image_folder='images', csv_file='/content/Qualcomm-DL-Hackathon/train/train.csv', transform=aug_transform)

In [17]:
pd.read_csv('aug_train.csv').head(10)

Unnamed: 0,image_names,label
0,1503.jpg,0
1,aug_0_0.jpg,0
2,aug_0_1.jpg,0
3,aug_0_2.jpg,0
4,aug_0_3.jpg,0
5,1420.jpg,0
6,aug_1_0.jpg,0
7,aug_1_1.jpg,0
8,aug_1_2.jpg,0
9,aug_1_3.jpg,0


In [18]:
pd.read_csv('aug_train.csv').shape

(8230, 2)

In [19]:
# creating transforms to resize and normalize te features based on imagenet standards
t_transforms = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet mean/std
])

In [20]:
# creating customdataset for transforming images to tensors and returning image tensors and labels
class CustomDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None, train=True):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.train = train

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.data.iloc[idx, 0])
        image = Image.open(img_name).convert('RGB')
        if self.transform:
            image = self.transform(image)

        if self.train:
            label = self.data.iloc[idx, 1]
            return image, label
        else:
            return image

In [21]:
# creating Datasets for training and testing
train_dataset = CustomDataset(csv_file='/content/Qualcomm-DL-Hackathon/train/aug_train.csv', root_dir='images', transform=t_transforms)
test_dataset = CustomDataset(csv_file='/content/Qualcomm-DL-Hackathon/test.csv', root_dir='images', transform=t_transforms, train=False)

In [22]:
train_dataset[0]

(tensor([[[-0.3883, -0.3712, -0.4911,  ..., -1.6727, -1.7069, -1.7583],
          [-0.4226, -0.3883, -0.5253,  ..., -1.7583, -1.8268, -1.7583],
          [-0.4397, -0.4054, -0.4739,  ..., -1.7925, -1.8953, -1.6727],
          ...,
          [-0.7308, -0.7137, -0.7650,  ..., -0.8678, -0.8164, -0.8849],
          [-0.7479, -0.6965, -0.7137,  ..., -0.8335, -0.7993, -0.8507],
          [-0.7822, -0.7137, -0.7308,  ..., -0.9192, -0.8849, -0.9363]],
 
         [[-0.2325, -0.2500, -0.4076,  ..., -1.2304, -1.1429, -1.0903],
          [-0.2675, -0.2675, -0.4426,  ..., -1.4755, -1.4230, -1.2479],
          [-0.3025, -0.2850, -0.3725,  ..., -1.6856, -1.6856, -1.3704],
          ...,
          [-0.6352, -0.6176, -0.6702,  ..., -0.7752, -0.7227, -0.7927],
          [-0.6527, -0.6001, -0.6176,  ..., -0.7402, -0.7052, -0.7577],
          [-0.6877, -0.6176, -0.6352,  ..., -0.8277, -0.7927, -0.8452]],
 
         [[-0.3753, -0.2358, -0.0964,  ..., -0.8807, -0.8633, -0.8458],
          [-0.3927, -0.2358,

In [23]:
# Split training data into training and validation sets
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_data, val_data = random_split(train_dataset, [train_size, val_size])

In [24]:
train_size, val_size

(6584, 1646)

In [25]:
# creating data loaders in the batches of size 32
batch_size = 16
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [26]:
# Define the CNN model architecture (no modular code)
conv1 = nn.Conv2d(3, 3, kernel_size=3, stride=1, padding=1, groups=3)
pw1 = nn.Conv2d(3, 128, kernel_size=1, stride=1, padding=0)
bn1 = nn.BatchNorm2d(128)
conv2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, groups=128)
pw2 = nn.Conv2d(128, 256, kernel_size=1, stride=1, padding=0)
bn2 = nn.BatchNorm2d(256)
conv3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, groups=256)
pw3 = nn.Conv2d(256, 512, kernel_size=1, stride=1, padding=0)
bn3 = nn.BatchNorm2d(512)
#conv4 = nn.Conv2d(128, 64, kernel_size=1, stride=1, padding=0)
#conv5 = nn.Conv2d(64, 128, kernel_size=1, stride=1, padding=0)
bn4 = nn.BatchNorm2d(3)
pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
fc1 = nn.Linear(512 * 16 * 16, 128)
fc2 = nn.Linear(128, 2)  # 2 classes (emergency non-emergency)

In [30]:
# Initialize the model
model = nn.Sequential(
    conv1,
    bn4,
    nn.ReLU(),
    pw1,
    bn1,
    nn.ReLU(),
    pool,
    conv2,
    bn1,
    nn.ReLU(),
    pw2,
    bn2,
    nn.ReLU(),
    pool,
    conv3,
    bn2,
    nn.ReLU(),
    pw3,
    bn3,
    nn.ReLU(),
    pool,
    nn.Flatten(),
    fc1,
    nn.ReLU(),
    nn.Dropout(0.3),
    fc2,
    #nn.Sigmoid()
    #nn.Softmax(dim=1)
).to(device)

In [28]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion.to(device)

CrossEntropyLoss()

In [None]:
# Training the model
num_epochs = 5
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    total = 0
    correct = 0


    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Track the loss and accuracy
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = 100 * correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')

In [31]:
# Hyperparameters to tune
learning_rates = [0.001, 0.01]
batch_sizes = [16, 32]
dropout_rates = [0.3, 0.5]
hidden_layers = [1, 2]  # 1 hidden layer or 2 hidden layers
hidden_units = [128]  # Size of each hidden layer
num_epochs = 10

# Placeholder for the best model and hyperparameters
best_model = None
best_accuracy = 0
best_params = {}

# Grid search loop over hyperparameters
for lr in learning_rates:
    for batch_size in batch_sizes:
        for dropout_rate in dropout_rates:
            for num_hidden_layers in hidden_layers:
                for hidden_units_per_layer in hidden_units:

                    # Recreate data loaders with the current batch size
                    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
                    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True)
                    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

                    # Define the CNN model architecture with the current number of hidden layers and dropout rate
                    conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
                    bn1 = nn.BatchNorm2d(32)
                    conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
                    bn2 = nn.BatchNorm2d(64)
                    conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
                    bn3 = nn.BatchNorm2d(128)
                    pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
                    fc_layers = []

                    # Create hidden layers based on the number of hidden layers
                    input_size = 128 * 16 * 16  # Image size after convolution and pooling
                    for i in range(num_hidden_layers):
                        fc_layers.append(nn.Linear(input_size, hidden_units_per_layer))
                        fc_layers.append(nn.ReLU())
                        fc_layers.append(nn.Dropout(dropout_rate))
                        input_size = hidden_units_per_layer  # Update input size for the next layer

                    fc_layers.append(nn.Linear(input_size, 2))  # Output layer for 5 classes (cycling, dancing, drinking, eating, sitting)

                    # Combine layers into a model
                    model = nn.Sequential(
                        conv1,
                        bn1,
                        nn.ReLU(),
                        pool,
                        conv2,
                        bn2,
                        nn.ReLU(),
                        pool,
                        conv3,
                        bn3,
                        nn.ReLU(),
                        pool,
                        nn.Flatten(),
                        *fc_layers
                    ).to(device)

                    # Define the loss function and optimizer for this combination of hyperparameters
                    criterion = nn.CrossEntropyLoss()
                    optimizer = optim.Adam(model.parameters(), lr=lr)
                    criterion.to(device)

                    # Train the model
                    for epoch in range(num_epochs):
                        model.train()
                        running_loss = 0.0
                        correct = 0
                        total = 0

                        for inputs, labels in train_loader:
                            inputs, labels = inputs.to(device), labels.to(device)

                            optimizer.zero_grad()
                            outputs = model(inputs)
                            loss = criterion(outputs, labels)
                            loss.backward()
                            optimizer.step()

                            running_loss += loss.item()
                            _, predicted = torch.max(outputs, 1)
                            total += labels.size(0)
                            correct += (predicted == labels).sum().item()

                        epoch_loss = running_loss / len(train_loader)
                        epoch_accuracy = 100 * correct / total
                        print(f"Epoch [{epoch+1}/{num_epochs}] for lr={lr}, batch_size={batch_size}, dropout={dropout_rate}, hidden_layers={num_hidden_layers}, hidden_units={hidden_units_per_layer}: Loss={epoch_loss:.4f}, Accuracy={epoch_accuracy:.2f}%")

                        if epoch_accuracy > best_accuracy:
                            best_accuracy = epoch_accuracy
                            torch.save(model.state_dict(), 'best_model.pth')

                    # Evaluate on the validation set after training
                    model.eval()
                    val_correct = 0
                    val_total = 0
                    with torch.no_grad():
                        for inputs, labels in val_loader:
                            inputs, labels = inputs.to(device), labels.to(device)
                            outputs = model(inputs)
                            _, predicted = torch.max(outputs, 1)
                            val_total += labels.size(0)
                            val_correct += (predicted == labels).sum().item()

                    val_accuracy = 100 * val_correct / val_total
                    print(f"Validation Accuracy for lr={lr}, batch_size={batch_size}, dropout={dropout_rate}, hidden_layers={num_hidden_layers}, hidden_units={hidden_units_per_layer}: {val_accuracy:.2f}%")


Epoch [1/10] for lr=0.001, batch_size=16, dropout=0.3, hidden_layers=1, hidden_units=128: Loss=0.7661, Accuracy=67.16%
Epoch [2/10] for lr=0.001, batch_size=16, dropout=0.3, hidden_layers=1, hidden_units=128: Loss=0.5576, Accuracy=70.63%
Epoch [3/10] for lr=0.001, batch_size=16, dropout=0.3, hidden_layers=1, hidden_units=128: Loss=0.5087, Accuracy=74.03%
Epoch [4/10] for lr=0.001, batch_size=16, dropout=0.3, hidden_layers=1, hidden_units=128: Loss=0.4795, Accuracy=76.55%
Epoch [5/10] for lr=0.001, batch_size=16, dropout=0.3, hidden_layers=1, hidden_units=128: Loss=0.4443, Accuracy=79.01%
Epoch [6/10] for lr=0.001, batch_size=16, dropout=0.3, hidden_layers=1, hidden_units=128: Loss=0.4153, Accuracy=80.71%
Epoch [7/10] for lr=0.001, batch_size=16, dropout=0.3, hidden_layers=1, hidden_units=128: Loss=0.4077, Accuracy=81.27%
Epoch [8/10] for lr=0.001, batch_size=16, dropout=0.3, hidden_layers=1, hidden_units=128: Loss=0.3844, Accuracy=82.70%
Epoch [9/10] for lr=0.001, batch_size=16, dropou

KeyboardInterrupt: 

In [None]:
# Evaluate the model on the validation set
model.eval()  # Set the model to evaluation mode
val_correct = 0
val_total = 0
with torch.no_grad():
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        val_total += labels.size(0)
        val_correct += (predicted == labels).sum().item()

val_accuracy = 100 * val_correct / val_total
print(f'Validation Accuracy: {val_accuracy:.2f}%')

Validation Accuracy: 59.70%


In [None]:
# Generate predictions for test images
model.eval()  # Set the model to evaluation mode
predictions = []

with torch.no_grad():
    for inputs in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())

# Print or save the predictions
print(predictions)

In [None]:
# evaluating images in test.csv file and writing predictions
#model.load_state_dict(torch.load('best_model.pth'))
model.eval()
predictions = []

with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        predictions.extend(predicted.cpu().numpy())

# Save predictions to CSV
test_data = pd.read_csv('/content/Qualcomm-DL-Hackathon/test.csv')
test_data['emergency_or_not'] = predictions
test_data.to_csv('/content/Qualcomm-DL-Hackathon/test_predictions.csv', index=False)
print('labels written to test_prediction.csv')