# **Importing LIbs:**

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import matplotlib.pyplot as plt
import cv2
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow.keras import datasets,layers,Sequential
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout,GlobalAveragePooling2D
from zipfile import ZipFile
import os,glob
from tqdm._tqdm_notebook import tqdm_notebook as tqdm
from keras.models import Model
from sklearn import preprocessing
from keras.layers import BatchNormalization
from zipfile import ZipFile
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import img_to_array, array_to_img
from tensorflow.keras.utils import to_categorical

Please use `tqdm.notebook.*` instead of `tqdm._tqdm_notebook.*`
  from tqdm._tqdm_notebook import tqdm_notebook as tqdm


# **Loading the DataSet:**

In [None]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

# Then move kaggle.json into the folder where the API expects to find it.
!mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json


Saving kaggle.json to kaggle.json
User uploaded file "kaggle.json" with length 76 bytes


In [None]:
!kaggle competitions download -c ai-mlprocom24

Downloading ai-mlprocom24.zip to /content
 99% 376M/380M [00:03<00:00, 48.5MB/s]
100% 380M/380M [00:03<00:00, 103MB/s] 


In [None]:
file = "/content/ai-mlprocom24.zip"
with ZipFile(file,'r') as zip:
  zip.extractall()
  print('Done')

Done


In [None]:
df = pd.read_csv('/content/Train.csv')

# Load and preprocess images
image_data = []
labels = []
class_names= ['Normal','Abnormal']
for index, row in df.iterrows():
    image_path = os.path.join('/content/train_images', row['ID'])   #row['ID']=img001.jpg -> /content/train_images/img001.jpg
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))  # Resize the image to a fixed size
    image = img_to_array(image)
    image_data.append(image)
    labels.append(row['Label'])



# **Label Encoding:**

In [None]:
labels_new = []
for i in labels:
    labels_new.append(class_names.index(i))
labels = labels_new
labels = tf.keras.utils.to_categorical(labels)

In [None]:
image_data = np.array(image_data, dtype='float32') / 255.0    #Normalization

In [None]:
from tensorflow.keras.applications.vgg16 import VGG16 , preprocess_input
from tensorflow.keras.preprocessing.image import load_img , img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.densenet import DenseNet121

**Using VGG16:**

In [None]:
r , c = 224 ,224
vgg = VGG16(weights='imagenet',include_top=False,input_shape=(r,c,3))   # 224 x 224 x 3

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
for layer in vgg.layers:
  layer.trainable = False

In [None]:
def vgg_model(bottom_model,classes):
  top_model=bottom_model.output
  top_model = GlobalAveragePooling2D()(top_model)
  top_model = Dense(1024,activation='relu')(top_model)
  top_model = Dense(512,activation='relu')(top_model)
  top_model = Dense(64,activation='relu')(top_model)
  top_model = Dense(32,activation='relu')(top_model)
  top_model = Dense(2,activation='softmax')(top_model)  #could have also used activation = 'sigmoid'
  return top_model

In [None]:
model_head = vgg_model(vgg,2)
model = Model(inputs=vgg.input,outputs=model_head)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(image_data, labels, test_size=0.2, random_state=42)

In [None]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [None]:
model.compile(loss = tf.keras.losses.CategoricalCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              metrics=["accuracy"])


In [None]:
model.fit(X_train,y_train,epochs=10,validation_data=(X_val, y_val),initial_epoch=0)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7e868010cfd0>

# **Using Resnet:**

In [None]:
# Load and preprocess images
image_data_r = []
labels_r = []
class_names= ['Normal','Abnormal']
for index, row in df.iterrows():
    image_path = os.path.join('/content/train_images', row['ID'])
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))  # Resize the image to a fixed size
    image = img_to_array(image)
    image_data_r.append(image)
    labels_r.append(row['Label'])

In [None]:
image_data_r = np.array(image_data_r, dtype='float32') / 255.0
labels_r = np.array(labels_r)

# Convert labels to binary format (0 for 'normal' and 1 for 'abnormal')
labels_r = np.where(labels_r == 'normal', 0, 1)

In [None]:

resnet = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = resnet.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

model_resnet = Model(inputs=resnet.input,outputs=predictions)
model_resnet.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 230, 230, 3)          0         ['input_2[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 112, 112, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 112, 112, 64

In [None]:
for layer in model_resnet.layers:
    layer.trainable = False

In [None]:
model_resnet.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
X_train, X_val, y_train, y_val = train_test_split(image_data, labels, test_size=0.2, random_state=42)


In [None]:
import cv2
import numpy as np
import pandas as pd
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.model_selection import train_test_split

# Load and preprocess images
image_data_r = []
labels_r = []
class_names = ['Normal', 'Abnormal']

for index, row in df.iterrows():
    image_path = os.path.join('/content/train_images', row['ID'])
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))  # Resize the image to a fixed size
    image = img_to_array(image)
    image_data_r.append(image)
    labels_r.append(row['Label'])

image_data_r = np.array(image_data_r, dtype='float32') / 255.0  # Normalize pixel values

# Convert labels to binary format (0 for 'Normal' and 1 for 'Abnormal')
labels_r = np.array(labels_r)
labels_r = np.where(labels_r == 'Normal', 0, 1)

# Load the pre-trained ResNet50 model
resnet = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Add custom layers for classification
x = resnet.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

# Create the final model
model_resnet = Model(inputs=resnet.input, outputs=predictions)

# Freeze the layers in the base model
for layer in resnet.layers:
    layer.trainable = False

# Compile the model
model_resnet.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(image_data_r, labels_r, test_size=0.2, random_state=42)

# Train the model
model_resnet.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))


# **Using InceptionV3**

In [None]:
image_data_i = []
labels_i = []

for index, row in df.iterrows():
    image_path = os.path.join('/content/train_images', row['ID'])
    image = cv2.imread(image_path)
    image = cv2.resize(image, (299, 299))
    image = img_to_array(image)
    image_data_i.append(image)
    labels_i.append(row['Label'])


image_data_i = np.array(image_data_i, dtype='float32') / 255.0
labels_i = np.array(labels_i)

# Convert labels to binary format (0 for 'normal' and 1 for 'abnormal')
labels_i = np.where(labels_i == 'normal', 0, 1)


X_train, X_val, y_train, y_val = train_test_split(image_data_i, labels_i, test_size=0.2, random_state=42)


base_model_i = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))

x = base_model_i.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions_i = Dense(1, activation='sigmoid')(x)

model_i = Model(inputs=base_model_i.input, outputs=predictions_i)

for layer in base_model_i.layers:
    layer.trainable = False

model_i.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model_i.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))




# **Using DenseNet:**

In [None]:
image_data_d = []
labels_d = []

for index, row in df.iterrows():
    image_path = os.path.join('/content/train_images', row['ID'])
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))  # Resize the image to match DenseNet input size
    image = img_to_array(image)
    image_data_d.append(image)
    labels_d.append(row['Label'])

# Convert lists to numpy arrays
image_data_d = np.array(image_data_d, dtype='float32') / 255.0  # Normalize pixel values
labels_d = np.array(labels_d)

# Convert labels to binary format (0 for 'normal' and 1 for 'abnormal')
labels_d = np.where(labels_d == 'normal', 0, 1)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(image_data_d, labels_d, test_size=0.2, random_state=42)

# Load the pre-trained DenseNet121 model
base_model_d = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Modify the model for transfer learning
x = base_model_d.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions_d = Dense(1, activation='sigmoid')(x)

# Create the final model
model_d = Model(inputs=base_model_d.input, outputs=predictions_d)

# Freeze the layers in the base model
for layer in base_model_d.layers:
    layer.trainable = False

# Compile the model
model_d.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model_d.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_val, y_val))

# # Evaluate the model
# loss, accuracy = model.evaluate(X_val, y_val)
# print("Validation Accuracy:", accuracy)

# Make predictions
# Assuming you have a test set or new images to predict on, you can load and preprocess them similarly to the training data
# Then use the trained model to make predictions
# predictions = model.predict(test_images)


# **Making predictions:**

In [None]:

test_df = pd.read_csv('/content/Test.csv')

# Load and preprocess images from the Test folder
test_images = []

for index, row in test_df.iterrows():
    image_path = os.path.join('/content/test_images', row['ID'])  # Assuming the images are in a folder named 'Test'
    image = cv2.imread(image_path)
    image = cv2.resize(image, (229, 229))  # Resize the image to match InceptionV3 input size
    image = img_to_array(image) / 255.0  # Normalize pixel values
    test_images.append(image)

test_images = np.array(test_images)

predictions = model_i.predict(test_images)

# Convert predictions to original label format ('normal' or 'abnormal')
predicted_labels = np.where(predictions > 0.5, 'abnormal', 'normal')

# Add the predicted labels to the DataFrame
test_df['Label'] = predicted_labels
test_df.head()
# Save the DataFrame with the predicted labels to a new CSV file
test_df.to_csv('Test_Predictions.csv', index=False)


In [None]:
test_df.to_csv('Test_Predictions.csv', index=False)


In [None]:
# Load and preprocess images from the Test folder
test_images = []

for index, row in test_df.iterrows():
    image_path = os.path.join('/content/test_images', row['ID'])  # Assuming the images are in a folder named 'Test'
    image = cv2.imread(image_path)
    image = cv2.resize(image, (299, 299))  # Resize the image to match InceptionV3 input size
    image = img_to_array(image) / 255.0  # Normalize pixel values
    test_images.append(image)

test_images = np.array(test_images)

predictions = model_i.predict(test_images)

# Convert predictions to original label format ('normal' or 'abnormal')
predicted_labels = np.where(predictions > 0.5, 'abnormal', 'normal')

# Add the predicted labels to the DataFrame
test_df['Label'] = predicted_labels
test_df.head()

# Save the DataFrame with the predicted labels to a new CSV file
#test_df.to_csv('Test_Predictions.csv', index=False)


In [None]:
test_df.to_csv('Test_Predictions_Inception.csv', index=False)

In [None]:
# Load and preprocess images from the Test folder
test_images = []

for index, row in test_df.iterrows():
    image_path = os.path.join('/content/test_images', row['ID'])  # Assuming the images are in a folder named 'Test'
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))  # Resize the image to match InceptionV3 input size
    image = img_to_array(image) / 255.0  # Normalize pixel values
    test_images.append(image)

test_images = np.array(test_images)

predictions = model.predict(test_images)

# Convert predictions to original label format ('normal' or 'abnormal')
predicted_labels = np.where(predictions > 0.5, 'abnormal', 'normal')

# Add the predicted labels to the DataFrame
test_df['Label'] = predicted_labels
test_df.head()

# Save the DataFrame with the predicted labels to a new CSV file
test_df.to_csv('Test_Predictions_vgg.csv', index=False)

# **Extracting features using Densenet and then using SVC to classify if the cell is normal or abnormal**

# **Using SVC**

In [None]:
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D

# Load the CSV file containing image names and labels for training
train_df = pd.read_csv('Train.csv')

# Load the images and extract features using a pre-trained ResNet50 model
features_resnet = []
labels = []

for index, row in train_df.iterrows():
    image_path = os.path.join('/content/train_images', row['ID'])
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))  # Resize the image to match ResNet50 input size
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    features = resnet.predict(image)
    features_resnet.append(features.flatten())
    labels.append(row['Label'])

features_resnet = np.array(features_resnet)
labels = np.array(labels)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features_resnet, labels, test_size=0.2, random_state=42)

# Train a machine learning model (e.g., SVM) using the extracted features
model = SVC()
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)

# Use the trained model to make predictions on the test images
test_df = pd.read_csv('Test.csv')
features_test = []

for index, row in test_df.iterrows():
    image_path = os.path.join('/content/test_images', row['ID'])
    image = cv2.imread(image_path)
    image = cv2.resize(image, (224, 224))  # Resize the image to match ResNet50 input size
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    features = resnet.predict(image)
    features_test.append(features.flatten())

features_test = np.array(features_test)

# Make predictions
predictions = model.predict(features_test)

# Save the predictions to a CSV file
test_df['Label'] = predictions
test_df.to_csv('Test_Predictions_1.0.csv', index=False)


# **Using XGBoost**

In [None]:
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from tensorflow.keras.applications import ResNet50

# Load the CSV file containing image names and labels for training
train_df = pd.read_csv('Train.csv')

# Load the images and extract features using a pre-trained ResNet50 model
resnet = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=resnet.input, outputs=resnet.output)

features_resnet = []
labels = []

for index, row in train_df.iterrows():
    image_path = os.path.join('/content/train_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (224, 224))  # Resize the image to match ResNet50 input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_resnet.append(features.flatten())
        labels.append(row['Label'])

features_resnet = np.array(features_resnet)
labels = np.array(labels)

# Convert labels to numeric format
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features_resnet, labels, test_size=0.2, random_state=42)

# Train an XGBoost model using the extracted features
model_xgb = XGBClassifier()
model_xgb.fit(X_train, y_train)

# Evaluate the model
y_pred = model_xgb.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)

# Use the trained model to make predictions on the test images
test_df = pd.read_csv('Test.csv')
features_test = []

for index, row in test_df.iterrows():
    image_path = os.path.join('/content/test_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (224, 224))  # Resize the image to match ResNet50 input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_test.append(features.flatten())

features_test = np.array(features_test)

# Make predictions
predictions = model_xgb.predict(features_test)

# Convert numeric predictions back to original labels
predicted_labels = label_encoder.inverse_transform(predictions)

# Save the predictions to a CSV file
test_df['Label'] = predicted_labels
test_df.to_csv('Test_Predictions_XGBoost.csv', index=False)



# **SVC + Mobilenet:**

In [None]:
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model

# Load the CSV file containing image names and labels for training
train_df = pd.read_csv('Train.csv')

# Load the images and extract features using a pre-trained MobileNetV2 model
mobilenet = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=mobilenet.input, outputs=mobilenet.output)

features_mobilenet = []
labels = []

for index, row in train_df.iterrows():
    image_path = os.path.join('/content/train_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (224, 224))  # Resize the image to match MobileNetV2 input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_mobilenet.append(features.flatten())
        labels.append(row['Label'])

features_mobilenet = np.array(features_mobilenet)
labels = np.array(labels)

# Convert labels to numeric format
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features_mobilenet, labels, test_size=0.2, random_state=42)

# Train an SVM model using the extracted features
model_svc = SVC()
model_svc.fit(X_train, y_train)

# Evaluate the model
y_pred = model_svc.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)

# Use the trained model to make predictions on the test images
test_df = pd.read_csv('Test.csv')
features_test = []

for index, row in test_df.iterrows():
    image_path = os.path.join('/content/test_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (224, 224))  # Resize the image to match MobileNetV2 input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_test.append(features.flatten())

features_test = np.array(features_test)

# Make predictions
predictions = model_svc.predict(features_test)

# Convert numeric predictions back to original labels
predicted_labels = label_encoder.inverse_transform(predictions)

# Save the predictions to a CSV file
test_df['Label'] = predicted_labels
test_df.to_csv('Test_Predictions_SVC_Mobilenet.csv', index=False)


# **SVC + InceptionV3**

In [None]:
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model

# Load the CSV file containing image names and labels for training
train_df = pd.read_csv('Train.csv')

# Load the images and extract features using a pre-trained InceptionV3 model
inceptionv3 = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))
model = Model(inputs=inceptionv3.input, outputs=inceptionv3.output)

features_inceptionv3 = []
labels = []

for index, row in train_df.iterrows():
    image_path = os.path.join('/content/train_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (299, 299))  # Resize the image to match InceptionV3 input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_inceptionv3.append(features.flatten())
        labels.append(row['Label'])

features_inceptionv3 = np.array(features_inceptionv3)
labels = np.array(labels)

# Convert labels to numeric format
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features_inceptionv3, labels, test_size=0.2, random_state=42)

# Train an SVM model using the extracted features
model_svc = SVC()
model_svc.fit(X_train, y_train)

# Evaluate the model
y_pred = model_svc.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)

# Use the trained model to make predictions on the test images
test_df = pd.read_csv('Test.csv')
features_test = []

for index, row in test_df.iterrows():
    image_path = os.path.join('/content/test_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (299, 299))  # Resize the image to match InceptionV3 input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_test.append(features.flatten())

features_test = np.array(features_test)

# Make predictions
predictions = model_svc.predict(features_test)

# Convert numeric predictions back to original labels
predicted_labels = label_encoder.inverse_transform(predictions)

# Save the predictions to a CSV file
test_df['Label'] = predicted_labels
test_df.to_csv('Test_Predictions_SVC_Inception.csv', index=False)


# **Random Forest + Resnet**

In [None]:
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model

# Load the CSV file containing image names and labels for training
train_df = pd.read_csv('Train.csv')

# Load the images and extract features using a pre-trained ResNet50 model
resnet = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=resnet.input, outputs=resnet.output)

features_resnet = []
labels = []

for index, row in train_df.iterrows():
    image_path = os.path.join('/content/train_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (224, 224))  # Resize the image to match ResNet50 input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_resnet.append(features.flatten())
        labels.append(row['Label'])

features_resnet = np.array(features_resnet)
labels = np.array(labels)

# Convert labels to numeric format
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features_resnet, labels, test_size=0.2, random_state=42)

# Train a Random Forest model using the extracted features
model_rf = RandomForestClassifier()
model_rf.fit(X_train, y_train)

# Evaluate the model
y_pred = model_rf.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)

# Use the trained model to make predictions on the test images
test_df = pd.read_csv('Test.csv')
features_test = []

for index, row in test_df.iterrows():
    image_path = os.path.join('/content/test_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (224, 224))  # Resize the image to match ResNet50 input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_test.append(features.flatten())

features_test = np.array(features_test)

# Make predictions
predictions = model_rf.predict(features_test)

# Convert numeric predictions back to original labels
predicted_labels = label_encoder.inverse_transform(predictions)

# Save the predictions to a CSV file
test_df['Label'] = predicted_labels
test_df.to_csv('Test_Predictions_Random.csv', index=False)


# **Decision Trees + resnet50**

In [None]:
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model

# Load the CSV file containing image names and labels for training
train_df = pd.read_csv('Train.csv')

# Load the images and extract features using a pre-trained ResNet50 model
resnet = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=resnet.input, outputs=resnet.output)

features_resnet = []
labels = []

for index, row in train_df.iterrows():
    image_path = os.path.join('/content/train_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (224, 224))  # Resize the image to match ResNet50 input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_resnet.append(features.flatten())
        labels.append(row['Label'])

features_resnet = np.array(features_resnet)
labels = np.array(labels)

# Convert labels to numeric format
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features_resnet, labels, test_size=0.2, random_state=42)

# Train a Decision Tree model using the extracted features
model_dt = DecisionTreeClassifier()
model_dt.fit(X_train, y_train)

# Evaluate the model
y_pred = model_dt.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)

# Use the trained model to make predictions on the test images
test_df = pd.read_csv('Test.csv')
features_test = []

for index, row in test_df.iterrows():
    image_path = os.path.join('/content/test_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (224, 224))  # Resize the image to match ResNet50 input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_test.append(features.flatten())

features_test = np.array(features_test)

# Make predictions
predictions = model_dt.predict(features_test)

# Convert numeric predictions back to original labels
predicted_labels = label_encoder.inverse_transform(predictions)

# Save the predictions to a CSV file
test_df['Label'] = predicted_labels
test_df.to_csv('Test_Predictions_Decision_Trees_resnet.csv', index=False)


# **Adaboost + Resnet50**

In [None]:
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model

# Load the CSV file containing image names and labels for training
train_df = pd.read_csv('Train.csv')

# Load the images and extract features using a pre-trained ResNet50 model
resnet = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=resnet.input, outputs=resnet.output)

features_resnet = []
labels = []

for index, row in train_df.iterrows():
    image_path = os.path.join('/content/train_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (224, 224))  # Resize the image to match ResNet50 input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_resnet.append(features.flatten())
        labels.append(row['Label'])

features_resnet = np.array(features_resnet)
labels = np.array(labels)

# Convert labels to numeric format
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features_resnet, labels, test_size=0.2, random_state=42)

# Train an AdaBoost model using the extracted features and DecisionTreeClassifier as base estimator
base_estimator = DecisionTreeClassifier(max_depth=1)
model_ab = AdaBoostClassifier(base_estimator=base_estimator, n_estimators=50)
model_ab.fit(X_train, y_train)

# Evaluate the model
y_pred = model_ab.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)

# Use the trained model to make predictions on the test images
test_df = pd.read_csv('Test.csv')
features_test = []

for index, row in test_df.iterrows():
    image_path = os.path.join('/content/test_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (224, 224))  # Resize the image to match ResNet50 input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_test.append(features.flatten())

features_test = np.array(features_test)

# Make predictions
predictions = model_ab.predict(features_test)

# Convert numeric predictions back to original labels
predicted_labels = label_encoder.inverse_transform(predictions)

# Save the predictions to a CSV file
test_df['Label'] = predicted_labels
test_df.to_csv('Test_Predictions_Adaboost_resnet.csv', index=False)


# **Adaboost + Xception**

In [None]:
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Model

# Load the CSV file containing image names and labels for training
train_df = pd.read_csv('Train.csv')

# Load the images and extract features using a pre-trained Xception model
xception = Xception(weights='imagenet', include_top=False, input_shape=(299, 299, 3))
model = Model(inputs=xception.input, outputs=xception.output)

features_xception = []
labels = []

for index, row in train_df.iterrows():
    image_path = os.path.join('/content/train_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (299, 299))  # Resize the image to match Xception input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_xception.append(features.flatten())
        labels.append(row['Label'])

features_xception = np.array(features_xception)
labels = np.array(labels)

# Convert labels to numeric format
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features_xception, labels, test_size=0.2, random_state=42)

# Train an AdaBoost model using the extracted features and DecisionTreeClassifier as base estimator
from sklearn.tree import DecisionTreeClassifier
base_estimator = DecisionTreeClassifier(max_depth=1)
model_ab = AdaBoostClassifier(base_estimator=base_estimator, n_estimators=50)
model_ab.fit(X_train, y_train)

# Evaluate the model
y_pred = model_ab.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)

# Use the trained model to make predictions on the test images
test_df = pd.read_csv('Test.csv')
features_test = []

for index, row in test_df.iterrows():
    image_path = os.path.join('/content/test_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (299, 299))  # Resize the image to match Xception input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_test.append(features.flatten())

features_test = np.array(features_test)

# Make predictions
predictions = model_ab.predict(features_test)

# Convert numeric predictions back to original labels
predicted_labels = label_encoder.inverse_transform(predictions)

# Save the predictions to a CSV file
test_df['Label'] = predicted_labels
test_df.to_csv('Test_Predictions_Adaboost_Xception.csv', index=False)


# **SVC + Xception**

In [None]:
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Model

# Load the CSV file containing image names and labels for training
train_df = pd.read_csv('Train.csv')

# Load the images and extract features using a pre-trained Xception model
xception = Xception(weights='imagenet', include_top=False, input_shape=(299, 299, 3))
model = Model(inputs=xception.input, outputs=xception.output)

features_xception = []
labels = []

for index, row in train_df.iterrows():
    image_path = os.path.join('/content/train_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (299, 299))  # Resize the image to match Xception input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_xception.append(features.flatten())
        labels.append(row['Label'])

features_xception = np.array(features_xception)
labels = np.array(labels)

# Convert labels to numeric format
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features_xception, labels, test_size=0.2, random_state=42)

# Train an SVC model using the extracted features
model_svc = SVC()
model_svc.fit(X_train, y_train)

# Evaluate the model
y_pred = model_svc.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)

# Use the trained model to make predictions on the test images
test_df = pd.read_csv('Test.csv')
features_test = []

for index, row in test_df.iterrows():
    image_path = os.path.join('/content/test_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (299, 299))  # Resize the image to match Xception input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_test.append(features.flatten())

features_test = np.array(features_test)

# Make predictions
predictions = model_svc.predict(features_test)

# Convert numeric predictions back to original labels
predicted_labels = label_encoder.inverse_transform(predictions)

# Save the predictions to a CSV file
test_df['Label'] = predicted_labels
test_df.to_csv('Test_Predictions_SVC_Xception.csv', index=False)


# **KNN + Resnet**

In [None]:
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model

# Load the CSV file containing image names and labels for training
train_df = pd.read_csv('Train.csv')

# Load the images and extract features using a pre-trained ResNet50 model
resnet = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=resnet.input, outputs=resnet.output)

features_resnet = []
labels = []

for index, row in train_df.iterrows():
    image_path = os.path.join('/content/train_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (224, 224))  # Resize the image to match ResNet50 input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_resnet.append(features.flatten())
        labels.append(row['Label'])

features_resnet = np.array(features_resnet)
labels = np.array(labels)

# Convert labels to numeric format
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features_resnet, labels, test_size=0.2, random_state=42)

# Train a KNN model using the extracted features
model_knn = KNeighborsClassifier(n_neighbors=5)
model_knn.fit(X_train, y_train)

# Evaluate the model
y_pred = model_knn.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)

# Use the trained model to make predictions on the test images
test_df = pd.read_csv('Test.csv')
features_test = []

for index, row in test_df.iterrows():
    image_path = os.path.join('/content/test_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (224, 224))  # Resize the image to match ResNet50 input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_test.append(features.flatten())

features_test = np.array(features_test)

# Make predictions
predictions = model_knn.predict(features_test)

# Convert numeric predictions back to original labels
predicted_labels = label_encoder.inverse_transform(predictions)

# Save the predictions to a CSV file
test_df['Label'] = predicted_labels
test_df.to_csv('Test_Predictions_KNN_resnet.csv', index=False)


# **Naive Bayes + Resnet50**

In [None]:
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model

# Load the CSV file containing image names and labels for training
train_df = pd.read_csv('Train.csv')

# Load the images and extract features using a pre-trained ResNet50 model
resnet = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=resnet.input, outputs=resnet.output)

features_resnet = []
labels = []

for index, row in train_df.iterrows():
    image_path = os.path.join('/content/train_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (224, 224))  # Resize the image to match ResNet50 input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_resnet.append(features.flatten())
        labels.append(row['Label'])

features_resnet = np.array(features_resnet)
labels = np.array(labels)

# Convert labels to numeric format
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features_resnet, labels, test_size=0.2, random_state=42)

# Train a Naive Bayes model using the extracted features
model_nb = GaussianNB()
model_nb.fit(X_train, y_train)

# Evaluate the model
y_pred = model_nb.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)

# Use the trained model to make predictions on the test images
test_df = pd.read_csv('Test.csv')
features_test = []

for index, row in test_df.iterrows():
    image_path = os.path.join('/content/test_images', row['ID'])
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (224, 224))  # Resize the image to match ResNet50 input size
        image = np.expand_dims(image, axis=0)  # Add batch dimension
        features = model.predict(image)
        features_test.append(features.flatten())

features_test = np.array(features_test)

# Make predictions
predictions = model_nb.predict(features_test)

# Convert numeric predictions back to original labels
predicted_labels = label_encoder.inverse_transform(predictions)

# Save the predictions to a CSV file
test_df['Label'] = predicted_labels
test_df.to_csv('Test_Predictions_Naive_Bayes_resnet50.csv', index=False)
