<a href="https://colab.research.google.com/github/KhanilReddy/ML1/blob/main/Hackathon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
#Downloading the entire project folder "Qualcomm-DL-Hackathon" from GitHub to your computer/colab
!git clone https://github.com/Prashant-AV/Qualcomm-DL-Hackathon.git

Cloning into 'Qualcomm-DL-Hackathon'...
remote: Enumerating objects: 10, done.[K
remote: Counting objects: 100% (10/10), done.[K
remote: Compressing objects: 100% (10/10), done.[K
remote: Total 10 (delta 1), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (10/10), 30.68 MiB | 14.52 MiB/s, done.
Resolving deltas: 100% (1/1), done.


In [23]:
import zipfile
import os
import shutil

def extract_all_files(zip_dir, output_dir):
    """
        This function is to extract all files from ZIP archives located in a specified directory and place them into an output directory.

        Args:
            zip_dir (:class:`str`): : Required: Directory path in which all zip files are present
            output_dir (:class:`str`): Required: Directory path in which all images has to be stored

        Example : Extracted all the images from images part-1.zip and images part-2.zip to all_images folder.
    """
    if os.path.exists(zip_dir):
      # Get the zip files in zip_dir directory
      files = os.listdir(zip_dir)
      zip_files = [f for f in files if f.endswith('.zip')]

      if not os.path.exists(output_dir):
        os.mkdir(output_dir)

      # Extract each zip file
      for zip_file in zip_files:
          with zipfile.ZipFile(os.path.join(zip_dir, zip_file), 'r') as zip_ref:
              for member in zip_ref.namelist():
                  # Extract each file to the output_dir directory without creating subfolders
                  filename = os.path.basename(member)
                  if filename:
                      source = zip_ref.open(member)
                      target = open(os.path.join(output_dir, filename), "wb")
                      with source, target:
                          shutil.copyfileobj(source, target)
    else:
      print("f{zip_dir} doesn't exits")

In [24]:
# Define the directory containing the zip files and the all_images directory
train_directory = '/content/Qualcomm-DL-Hackathon/train/'
all_images_directory = os.path.join(train_directory , 'all_images')

# Extract all files from the zip files in path /content/Qualcomm-DL-Hackathon/train/ to the /content/Qualcomm-DL-Hackathon/train/all_images directory
extract_all_files(train_directory, all_images_directory)

print(f"All files have been extracted to {all_images_directory}.")

All files have been extracted to /content/Qualcomm-DL-Hackathon/train/all_images.


In [8]:
# pandas to read the csv and write the predicted data to csv
import pandas as pd

# Importing necessary modules from TensorFlow and Keras
from tensorflow import keras
from keras.layers import Dense, Dropout, Flatten
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.utils import to_categorical
from keras.models import Sequential
from keras.optimizers import SGD
from keras.models import Model

# Importing necessary modules from scikit-learn
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Importing tqdm for progress bar and numpy for numerical operations
from tqdm import tqdm
import numpy as np



In [9]:
# Loading the training data from a CSV file
train = pd.read_csv('/content/Qualcomm-DL-Hackathon/train/train.csv')

# Initializing an empty list to store the training images
train_img=[]

for i in tqdm(range(train.shape[0])):
  # Loading the image and resizing it to 224x224 pixels, Converting the image to an array and Appending the image array to the list
  temp_img = image.load_img('/content/Qualcomm-DL-Hackathon/train/all_images/'+train['image_names'][i], target_size=(224,224))
  temp_img=image.img_to_array(temp_img)

  train_img.append(temp_img)


100%|██████████| 1646/1646 [00:02<00:00, 643.17it/s]


In [10]:

def vgg16_model(img_rows, img_cols, channel=1, num_classes=None):
    """
    Builds a VGG16-based convolutional neural network model with custom top layers.

    Parameters:
    img_rows (int): The number of rows in the input images.
    img_cols (int): The number of columns in the input images.
    channel (int): The number of channels in the input images (default is 1).
    num_classes (int): The number of output classes for the classification task.

    Returns:
    keras.models.Model: A compiled Keras model ready for training.


    Example usage:
    model = vgg16_model(img_rows=224, img_cols=224, channel=3, num_classes=10)
    """
    #Loads the VGG16 model with weights pre-trained on the ImageNet dataset and it includes the original fully connected layers at the top of the VGG16 architecture
    model = VGG16(weights='imagenet', include_top=True)

    #remove the last layer of the pre-trained VGG16 model and  clears connections of the last layer..
    model.layers.pop()
    model.layers[-1].outbound_nodes = []

    #Customer layer addition which acts as output layer
    x=Dense(num_classes, activation='sigmoid')(model.output)
    model=Model(model.input,x)

    #To set the first 8 layers to non-trainable (weights will not be updated)
    for layer in model.layers[:8]:
        layer.trainable = False

        # Compile the model with SGD optimizer and binary cross-entropy loss
        sgd = SGD(learning_rate=1e-3, weight_decay=1e-6, momentum=0.9, nesterov=True)
        model.compile(optimizer=sgd, loss='binary_crossentropy', metrics=['accuracy'])

        return model

In [19]:
# Extracting the target variable from the training data
train_label = train['emergency_or_not'].values

# Initializing the LabelEncoder
le = LabelEncoder()

# Encoding the target variable
train_label = le.fit_transform(train_label)

# Converting the encoded labels to categorical format
train_label=to_categorical(train_label)
train_label=np.array(train_label)

# Splitting the data into training and validation sets
train_image, valid_image, train_label, valid_label=train_test_split(train_img,train_label,test_size=0.2, random_state=42)

# Setting parameters for the model
img_rows, img_cols = 224, 224           # Resolution of inputs
channel = 3                             # Number of channels (3 for RGB images)
num_classes = 2                         # Number of output classes : emergency and Non_emergency
batch_size = 32                         # Batch size for training
nb_epoch = 15                           # Number of epochs for training

# Converting lists to numpy arrays so that they can be efficiently processed by the deep learning model during training and validation.
train_image = np.stack(train_image)
valid_image = np.stack(valid_image)
train_label = np.stack(train_label)
valid_label = np.stack(valid_label)

# Load our final model
model = vgg16_model(img_rows, img_cols, channel, num_classes)

In [20]:
# Train the model with necessary input parameters such as input images, labels, batch_size, epochs,
model.fit(train_image, train_label,batch_size=batch_size,epochs=nb_epoch,shuffle=True,verbose=1,validation_data=(valid_image, valid_label))

combined_data = np.concatenate((train_image, valid_image))
combined_labels = np.concatenate((train_label, valid_label))

# Train on the combined dataset for 5 more epochs on total data set
additional_epochs = 5
history_combined = model.fit(combined_data, combined_labels, epochs=additional_epochs, batch_size=batch_size, shuffle=True, verbose=1)

predictions_valid = model.predict(valid_image, batch_size=batch_size, verbose=1)

# Get the class with the highest probability for each prediction
validation_prediction = np.argmax(predictions_valid, axis=1)
validation_prediction

Epoch 1/15
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 623ms/step - accuracy: 0.6469 - loss: 0.6888 - val_accuracy: 0.7909 - val_loss: 0.6715
Epoch 2/15
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 515ms/step - accuracy: 0.8281 - loss: 0.6667 - val_accuracy: 0.8818 - val_loss: 0.6529
Epoch 3/15
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 522ms/step - accuracy: 0.9174 - loss: 0.6464 - val_accuracy: 0.8515 - val_loss: 0.6407
Epoch 4/15
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 555ms/step - accuracy: 0.9231 - loss: 0.6273 - val_accuracy: 0.9030 - val_loss: 0.6178
Epoch 5/15
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 521ms/step - accuracy: 0.9483 - loss: 0.6042 - val_accuracy: 0.9212 - val_loss: 0.5995
Epoch 6/15
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 556ms/step - accuracy: 0.9546 - loss: 0.5884 - val_accuracy: 0.9333 - val_loss: 0.5809
Epoch 7/15
[1m42/42[

array([1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1,
       1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1,
       1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [25]:
# Load the test data from a CSV file
test = pd.read_csv('/content/Qualcomm-DL-Hackathon/test.csv')

# Initialize an empty list to store the test images
test_image = []
for i in tqdm(range(test.shape[0])):
    # Loading the image and resizing it to 224x224 pixels, Converting the image to an array and Appending the image array to the list
    img = image.load_img('/content/Qualcomm-DL-Hackathon/train/all_images/'+test['image_names'][i], target_size=(224,224))
    img = image.img_to_array(img)
    test_image.append(img)

# Convert the list of test images to a numpy array
test_image = np.stack(test_image)
test_image.shape

# Make predictions on the test images
predictions_test = model.predict(test_image, batch_size=batch_size, verbose=1)
predictions_test

# Get the class with the highest probability for each prediction
test_prediction = np.argmax(predictions_test, axis=1)
test_prediction

100%|██████████| 706/706 [00:00<00:00, 991.97it/s]


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 141ms/step


array([0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0,
       1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1,
       1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0,
       1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
       0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
       0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1,

In [26]:
# Add the predictions to the test data
test['emergency_or_not'] = test_prediction

# Save the updated test data with predictions to a new CSV file
test.to_csv('/content/Qualcomm-DL-Hackathon/submission_final.csv', header=True, index=False)