In [1]:
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import numpy as np
import pandas as pd
import seaborn as sns
from PIL import Image
import os
from tqdm import tqdm
import cv2

import shutil

In [2]:
from google.colab import files

In [3]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [4]:
!kaggle competitions download -c umor-classification

Downloading umor-classification.zip to /content
 99% 190M/193M [00:10<00:00, 21.7MB/s]
100% 193M/193M [00:10<00:00, 18.7MB/s]


In [5]:
!unzip umor-classification.zip

Archive:  umor-classification.zip
  inflating: images/images/1006.png  
  inflating: images/images/1028.png  
  inflating: images/images/1046.png  
  inflating: images/images/1062.png  
  inflating: images/images/1066.png  
  inflating: images/images/1068.png  
  inflating: images/images/1070.png  
  inflating: images/images/1073.png  
  inflating: images/images/1091.png  
  inflating: images/images/1097.png  
  inflating: images/images/1105.png  
  inflating: images/images/1127.png  
  inflating: images/images/1138.png  
  inflating: images/images/1154.png  
  inflating: images/images/1156.png  
  inflating: images/images/1159.png  
  inflating: images/images/1183.png  
  inflating: images/images/1191.png  
  inflating: images/images/1202.png  
  inflating: images/images/1218.png  
  inflating: images/images/1230.png  
  inflating: images/images/1234.png  
  inflating: images/images/1235.png  
  inflating: images/images/1240.png  
  inflating: images/images/1253.png  
  inflating: ima

### Load the CSV files containing the data:

In [6]:
# # Load and preprocess training images
image_folder   = '/content/images/images/'

In [7]:
print(f"There are {len(os.listdir(image_folder ))} images in  dataset")

There are 775 images in  dataset


In [8]:
train_df  = pd.read_csv('trainset.csv')
test_df   = pd.read_csv('testset.csv')

In [9]:
train_df['Class'].value_counts()

Benign       300
Malignant    148
Normal        91
Name: Class, dtype: int64

In [10]:
train_df[:50]

Unnamed: 0.1,Unnamed: 0,Id,Class
0,211,8602,Malignant
1,315,7464,Malignant
2,165,5832,Malignant
3,188,2843,Malignant
4,710,5272,Benign
5,285,9436,Malignant
6,34,4050,Normal
7,708,7653,Benign
8,334,7048,Malignant
9,776,2885,Benign


In [11]:
image_filenames = os.listdir(image_folder)

In [12]:
id_to_label = dict(zip(train_df['Id'], train_df['Class']))

In [13]:
def load_images_with_labels(df):
    images = []
    labels = []
    for index, row in df.iterrows():
        image_id = row['Id']
        if image_id in id_to_label:
            label = id_to_label[image_id]

            image_path = os.path.join(image_folder, f"{image_id}.png")
            if os.path.exists(image_path):  # Check if the image file exists
                image = cv2.imread(image_path)  # Load the image using cv2

                if image is not None:  # Check if the image was loaded successfully
                    images.append(image)
                    labels.append(label)
                else:
                    print(f"Warning: Unable to load image: {image_path}")
            else:
                print(f"Warning: Image not found: {image_path}")
    return np.array(images), np.array(labels)

In [14]:
X_train, y_train = load_images_with_labels(train_df)

  return np.array(images), np.array(labels)


In [15]:
y_train

array(['Malignant', 'Malignant', 'Malignant', 'Malignant', 'Benign',
       'Malignant', 'Normal', 'Benign', 'Malignant', 'Benign', 'Benign',
       'Benign', 'Benign', 'Malignant', 'Benign', 'Benign', 'Benign',
       'Benign', 'Malignant', 'Malignant', 'Normal', 'Benign',
       'Malignant', 'Malignant', 'Malignant', 'Malignant', 'Benign',
       'Benign', 'Malignant', 'Benign', 'Benign', 'Benign', 'Benign',
       'Malignant', 'Benign', 'Benign', 'Benign', 'Benign', 'Benign',
       'Benign', 'Normal', 'Malignant', 'Benign', 'Normal', 'Malignant',
       'Benign', 'Normal', 'Benign', 'Benign', 'Benign', 'Normal',
       'Benign', 'Benign', 'Malignant', 'Malignant', 'Benign', 'Benign',
       'Malignant', 'Benign', 'Malignant', 'Benign', 'Benign',
       'Malignant', 'Benign', 'Normal', 'Malignant', 'Malignant',
       'Malignant', 'Benign', 'Benign', 'Malignant', 'Benign', 'Normal',
       'Benign', 'Benign', 'Benign', 'Normal', 'Benign', 'Malignant',
       'Normal', 'Normal', 'Mal

In [16]:
image_filenames = [filename for filename in os.listdir(image_folder) if filename.endswith(".png")]
image_ids = [int(image_id[:-4]) for image_id in image_filenames if image_id[:-4].isnumeric()]

# Get the image IDs included in the training set
train_image_ids = set(train_df['Id'])

In [17]:
# Get the image IDs from the test CSV
test_image_ids = list(test_df['Id'])

In [18]:
remaining_test_image_ids = [image_id for image_id in test_image_ids if image_id not in train_image_ids]

In [19]:
def load_remaining_test_images(image_ids):
    images = []
    for image_id in image_ids:
        image_path = os.path.join(image_folder, f"{image_id}.png")
        image = cv2.imread(image_path)
        if image is not None:
            images.append(image)
    return np.array(images)

In [20]:
X_test = load_remaining_test_images(remaining_test_image_ids)

  return np.array(images)


In [21]:
X_test.shape

(229,)

In [22]:
from sklearn.utils import shuffle
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Create separate lists for each class
benign_images = []
malignant_images = []
normal_images = []

# Iterate through the training data and populate the lists based on class
for image, label in zip(X_train, y_train):
    if label == "Benign":
        benign_images.append(image)
    elif label == "Malignant":
        malignant_images.append(image)
    elif label == "Normal":
        normal_images.append(image)

In [23]:
benign_images

[array([[[  5,   5,   5],
         [ 18,  18,  18],
         [151, 151, 151],
         ...,
         [132, 132, 132],
         [108, 108, 108],
         [ 69,  69,  69]],
 
        [[  1,   1,   1],
         [ 35,  35,  35],
         [133, 133, 133],
         ...,
         [122, 122, 122],
         [120, 120, 120],
         [105, 105, 105]],
 
        [[  7,   7,   7],
         [ 66,  66,  66],
         [ 96,  96,  96],
         ...,
         [139, 139, 139],
         [135, 135, 135],
         [128, 128, 128]],
 
        ...,
 
        [[ 60,  60,  60],
         [ 58,  58,  58],
         [ 58,  58,  58],
         ...,
         [ 70,  70,  70],
         [ 67,  67,  67],
         [ 64,  64,  64]],
 
        [[ 64,  64,  64],
         [ 67,  67,  67],
         [ 68,  68,  68],
         ...,
         [ 75,  75,  75],
         [ 74,  74,  74],
         [ 73,  73,  73]],
 
        [[ 46,  46,  46],
         [ 49,  49,  49],
         [ 51,  51,  51],
         ...,
         [ 85,  85,  85],
  

In [24]:
malignant_images

[array([[[202, 202, 202],
         [ 43,  43,  43],
         [  2,   2,   2],
         ...,
         [116, 116, 116],
         [107, 107, 107],
         [103, 103, 103]],
 
        [[203, 203, 203],
         [ 42,  42,  42],
         [  0,   0,   0],
         ...,
         [118, 118, 118],
         [109, 109, 109],
         [105, 105, 105]],
 
        [[198, 198, 198],
         [ 51,  51,  51],
         [  3,   3,   3],
         ...,
         [113, 113, 113],
         [106, 106, 106],
         [102, 102, 102]],
 
        ...,
 
        [[ 23,  23,  23],
         [ 23,  23,  23],
         [ 23,  23,  23],
         ...,
         [ 28,  28,  28],
         [ 27,  27,  27],
         [ 27,  27,  27]],
 
        [[ 28,  28,  28],
         [ 27,  27,  27],
         [ 25,  25,  25],
         ...,
         [ 33,  33,  33],
         [ 32,  32,  32],
         [ 31,  31,  31]],
 
        [[ 31,  31,  31],
         [ 29,  29,  29],
         [ 26,  26,  26],
         ...,
         [ 35,  35,  35],
  

In [25]:
normal_images

[array([[[ 76,  76,  76],
         [ 79,  79,  79],
         [ 79,  79,  79],
         ...,
         [ 70,  70,  70],
         [ 67,  67,  67],
         [ 65,  65,  65]],
 
        [[157, 157, 157],
         [159, 159, 159],
         [162, 162, 162],
         ...,
         [173, 173, 173],
         [166, 166, 166],
         [162, 162, 162]],
 
        [[142, 142, 142],
         [148, 148, 148],
         [159, 159, 159],
         ...,
         [181, 181, 181],
         [176, 176, 176],
         [172, 172, 172]],
 
        ...,
 
        [[  4,   4,   4],
         [  7,   7,   7],
         [ 12,  12,  12],
         ...,
         [ 13,  13,  13],
         [ 13,  13,  13],
         [ 13,  13,  13]],
 
        [[  7,   7,   7],
         [ 12,  12,  12],
         [ 14,  14,  14],
         ...,
         [ 16,  16,  16],
         [ 14,  14,  14],
         [ 12,  12,  12]],
 
        [[ 12,  12,  12],
         [ 17,  17,  17],
         [ 18,  18,  18],
         ...,
         [ 18,  18,  18],
  

In [26]:
# Convert lists to numpy arrays
benign_images = np.array(benign_images)
malignant_images = np.array(malignant_images)
normal_images = np.array(normal_images)

  benign_images = np.array(benign_images)
  malignant_images = np.array(malignant_images)
  normal_images = np.array(normal_images)


In [27]:
# Determine the maximum class count
max(benign_images.shape[0], malignant_images.shape[0], normal_images.shape[0])

300

In [28]:
'''# Function to perform data augmentation for medical images
def perform_data_augmentation(images, desired_count):
    augmented_images = []
    current_count = images.shape[0]

    while current_count < desired_count:
        random_index = np.random.randint(0, images.shape[0])
        original_image = images[random_index]

        datagen = ImageDataGenerator(
            rotation_range=15,
            width_shift_range=0.1,
            height_shift_range=0.1,
            shear_range=0.1,
            zoom_range=0.1,
            horizontal_flip=True,
            fill_mode='nearest'
        )

        augmented_image = datagen.random_transform(original_image)
        augmented_images.append(augmented_image)

        current_count += 1

    return np.array(augmented_images)'''


"# Function to perform data augmentation for medical images\ndef perform_data_augmentation(images, desired_count):\n    augmented_images = []\n    current_count = images.shape[0]\n\n    while current_count < desired_count:\n        random_index = np.random.randint(0, images.shape[0])\n        original_image = images[random_index]\n\n        datagen = ImageDataGenerator(\n            rotation_range=15,\n            width_shift_range=0.1,\n            height_shift_range=0.1,\n            shear_range=0.1,\n            zoom_range=0.1,\n            horizontal_flip=True,\n            fill_mode='nearest'\n        )\n\n        augmented_image = datagen.random_transform(original_image)\n        augmented_images.append(augmented_image)\n\n        current_count += 1\n\n    return np.array(augmented_images)"

In [29]:
'''# Perform data augmentation for the minority classes
augmented_malignant = perform_data_augmentation(malignant_images, 300)
augmented_normal = perform_data_augmentation(normal_images, 300)
'''

'# Perform data augmentation for the minority classes\naugmented_malignant = perform_data_augmentation(malignant_images, 300)\naugmented_normal = perform_data_augmentation(normal_images, 300)\n'

In [30]:
# Concatenate the balanced data
balanced_X_train = np.concatenate((benign_images, malignant_images, normal_images))
balanced_y_train = np.concatenate((
    np.array(["Benign"] * benign_images.shape[0]),
    #np.array(["Malignant"] * augmented_malignant.shape[0]),
    #np.array(["Normal"] * augmented_normal.shape[0]),
    np.array(["Malignant"] * malignant_images.shape[0]),
    np.array(["Normal"] * normal_images.shape[0])

))


In [31]:
len(balanced_X_train),len(balanced_y_train)

(539, 539)

In [32]:
balanced_X_train

array([array([[[  5,   5,   5],
               [ 18,  18,  18],
               [151, 151, 151],
               ...,
               [132, 132, 132],
               [108, 108, 108],
               [ 69,  69,  69]],

              [[  1,   1,   1],
               [ 35,  35,  35],
               [133, 133, 133],
               ...,
               [122, 122, 122],
               [120, 120, 120],
               [105, 105, 105]],

              [[  7,   7,   7],
               [ 66,  66,  66],
               [ 96,  96,  96],
               ...,
               [139, 139, 139],
               [135, 135, 135],
               [128, 128, 128]],

              ...,

              [[ 60,  60,  60],
               [ 58,  58,  58],
               [ 58,  58,  58],
               ...,
               [ 70,  70,  70],
               [ 67,  67,  67],
               [ 64,  64,  64]],

              [[ 64,  64,  64],
               [ 67,  67,  67],
               [ 68,  68,  68],
               ...,
        

In [33]:
# Shuffle the balanced data
balanced_X_train, balanced_y_train = shuffle(balanced_X_train, balanced_y_train)


In [34]:
balanced_X_train[0].shape

(582, 776, 3)

In [82]:
def resize_and_normalize_image(image, desired_size):
    # Resize the image
    resized_image = cv2.resize(image, desired_size)

    # Normalize the image by dividing each pixel value by 255
    normalized_image = resized_image / 255.0

    return normalized_image

def resize_and_normalize_images(image_array, desired_size):
    normalized_images = []

    for image in image_array:
        normalized_image = resize_and_normalize_image(image, desired_size)
        normalized_images.append(normalized_image)

    return np.array(normalized_images)

# Example usage
desired_image_size = (200, 200)  # Adjust the desired size

# Resize and normalize images in balanced_X_train
normalized_balanced_X_train = resize_and_normalize_images(balanced_X_train, desired_image_size)

# Resize and normalize images in X_test
normalized_X_test = resize_and_normalize_images(X_test, desired_image_size)

In [83]:
normalized_balanced_X_train.shape

(539, 200, 200, 3)

In [84]:
normalized_X_test.shape

(229, 200, 200, 3)

In [85]:
le = LabelEncoder()
balanced_y_train = le.fit_transform(balanced_y_train)


In [86]:
balanced_y_train

array([0, 0, 0, 0, 0, 2, 1, 0, 1, 1, 1, 0, 2, 0, 2, 0, 2, 0, 1, 0, 0, 0,
       0, 1, 1, 1, 1, 2, 1, 1, 2, 0, 0, 0, 0, 0, 2, 2, 0, 2, 0, 1, 0, 1,
       1, 0, 1, 1, 0, 1, 2, 2, 0, 0, 2, 0, 0, 0, 0, 1, 1, 1, 2, 1, 0, 2,
       0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 0, 2, 1, 0, 2, 0, 2, 1, 0, 1, 2, 2,
       0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0, 2, 2, 0, 0, 0, 1, 1, 0, 0, 2,
       0, 2, 0, 2, 1, 1, 0, 1, 1, 0, 2, 0, 1, 2, 1, 0, 1, 0, 2, 0, 0, 0,
       1, 0, 2, 0, 0, 2, 0, 0, 0, 1, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 2, 1,
       1, 2, 0, 1, 0, 0, 1, 0, 2, 0, 2, 2, 0, 0, 0, 0, 2, 1, 0, 1, 2, 1,
       2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 1, 0, 1, 1, 0, 0, 1, 0,
       2, 0, 1, 0, 0, 1, 2, 0, 0, 1, 2, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1,
       0, 0, 1, 1, 0, 1, 2, 2, 1, 0, 1, 2, 1, 2, 1, 0, 0, 1, 1, 1, 0, 1,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 2, 0, 0, 1, 0, 1, 1, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0,
       2, 0, 0, 2, 0, 0, 2, 0, 0, 1, 1, 0, 1, 0, 2,

In [87]:
X_train,X_val,y_train,y_val = train_test_split(normalized_balanced_X_train,balanced_y_train, test_size=0.2,random_state=42)

In [88]:
X_train.shape,X_val.shape,y_train.shape,y_val.shape

((431, 200, 200, 3), (108, 200, 200, 3), (431,), (108,))

In [89]:
from skimage.feature import hog
# List to store extracted HOG features
training_hog_features = []

hog_features = []

for image in X_train:
    # Compute HOG features for each color channel
    hog_channel_0 = hog(image[:, :, 0], block_norm='L2-Hys', visualize=False)
    hog_channel_1 = hog(image[:, :, 1], block_norm='L2-Hys', visualize=False)
    hog_channel_2 = hog(image[:, :, 2], block_norm='L2-Hys', visualize=False)

    # Concatenate HOG features from all channels
    hog_feature = np.concatenate((hog_channel_0, hog_channel_1, hog_channel_2))

    hog_features.append(hog_feature)

In [90]:

# Convert to NumPy array
X_train = np.array(hog_features)

In [91]:
X_train.shape

(431, 128547)

In [92]:
# Assuming X_val is a list of RGB images
X_val = np.array(X_val)  # Convert the list to a numpy array
print(X_val.shape)  # Check the shape to make sure it's (num_images, height, width, channels)


(108, 200, 200, 3)


In [93]:
validation_hog_features = []
hog_feature = []

for image in X_val:
    hog_channel_0 = hog(image[:, :, 0], block_norm='L2-Hys', visualize=False)
    hog_channel_1 = hog(image[:, :, 1], block_norm='L2-Hys', visualize=False)
    hog_channel_2 = hog(image[:, :, 2], block_norm='L2-Hys', visualize=False)

    hog_feature = np.concatenate((hog_channel_0, hog_channel_1, hog_channel_2))

    validation_hog_features.append(hog_feature)

X_val = np.array(validation_hog_features)

In [94]:
X_val.shape

(108, 128547)

In [95]:
'''# Flatten the images in X_train and X_val
X_train_flattened = X_train.reshape(X_train.shape[0], -1)
X_val_flattened = X_val.reshape(X_val.shape[0], -1)
X_train_flattened.shape,X_val_flattened.shape'''

'# Flatten the images in X_train and X_val\nX_train_flattened = X_train.reshape(X_train.shape[0], -1)\nX_val_flattened = X_val.reshape(X_val.shape[0], -1)\nX_train_flattened.shape,X_val_flattened.shape'

In [96]:
'''normalized_X_test = normalized_X_test.reshape(normalized_X_test.shape[0], -1)
normalized_X_test.shape'''

'normalized_X_test = normalized_X_test.reshape(normalized_X_test.shape[0], -1)\nnormalized_X_test.shape'

In [97]:
test_hog_features = []
hog_feature = []

# Iterate through each normalized test image
for image in normalized_X_test:
    # Extract HOG features for each channel
    hog_channel_0 = hog(image[:, :, 0], block_norm='L2-Hys', visualize=False)
    hog_channel_1 = hog(image[:, :, 1], block_norm='L2-Hys', visualize=False)
    hog_channel_2 = hog(image[:, :, 2], block_norm='L2-Hys', visualize=False)

    # Concatenate HOG features from all channels
    hog_feature = np.concatenate((hog_channel_0, hog_channel_1, hog_channel_2))

    # Append the concatenated HOG feature to the test_hog_features list
    test_hog_features.append(hog_feature)

# Convert the test_hog_features list to a numpy array
normalized_X_test = np.array(test_hog_features)

In [98]:
normalized_X_test.shape

(229, 128547)

In [99]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.svm import SVC

# Create an SVM classifier
svm_classifier = SVC(kernel='rbf', C=3)

# Train the SVM classifier on the training data
svm_classifier.fit(X_train, y_train)

y_pred1 = svm_classifier.predict(X_train)
y_pred2 = svm_classifier.predict(X_val)

accuracy1 = accuracy_score(y_train, y_pred1)
accuracy2 = accuracy_score(y_val, y_pred2)

confusion1 = confusion_matrix(y_train, y_pred1)
confusion2 = confusion_matrix(y_val, y_pred2)

classification_rep1 = classification_report(y_train, y_pred1)
classification_rep2 = classification_report(y_val, y_pred2)

print("Accuracy train:", accuracy1)
print("Confusion Matrix train:\n", confusion1)
print("Classification Report train:\n", classification_rep1)
print("********************************************************")
print("Accuracy test:", accuracy2)
print("Confusion Matrix test:\n", confusion2)
print("Classification Report test:\n", classification_rep2)


Accuracy train: 1.0
Confusion Matrix train:
 [[245   0   0]
 [  0 112   0]
 [  0   0  74]]
Classification Report train:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       245
           1       1.00      1.00      1.00       112
           2       1.00      1.00      1.00        74

    accuracy                           1.00       431
   macro avg       1.00      1.00      1.00       431
weighted avg       1.00      1.00      1.00       431

********************************************************
Accuracy test: 0.7685185185185185
Confusion Matrix test:
 [[53  2  0]
 [13 23  0]
 [ 9  1  7]]
Classification Report test:
               precision    recall  f1-score   support

           0       0.71      0.96      0.82        55
           1       0.88      0.64      0.74        36
           2       1.00      0.41      0.58        17

    accuracy                           0.77       108
   macro avg       0.86      0.67      0.71  

In [79]:
'''from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

# Initialize and train Extra Trees classifier
clf = ExtraTreesClassifier(
    n_estimators=50,
    random_state=42,
    #class_weight='balanced',
    max_features=1000,
    max_depth=10,
    min_weight_fraction_leaf=0.0,
    max_samples=0.5,
    bootstrap=True
)
clf.fit(X_train, y_train)

# Make predictions
predictions = clf.predict(X_val)

accuracy1 = accuracy_score(y_train, y_pred1)
accuracy2 = accuracy_score(y_val, y_pred2)

confusion1 = confusion_matrix(y_train, y_pred1)
confusion2 = confusion_matrix(y_val, y_pred2)

classification_rep1 = classification_report(y_train, y_pred1)
classification_rep2 = classification_report(y_val, y_pred2)

print("Accuracy train:", accuracy1)
print("Confusion Matrix train:\n", confusion1)
print("Classification Report train:\n", classification_rep1)
print("********************************************************")
print("Accuracy test:", accuracy2)
print("Confusion Matrix test:\n", confusion2)
print("Classification Report test:\n", classification_rep2)'''

Accuracy train: 1.0
Confusion Matrix train:
 [[245   0   0]
 [  0 112   0]
 [  0   0  74]]
Classification Report train:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       245
           1       1.00      1.00      1.00       112
           2       1.00      1.00      1.00        74

    accuracy                           1.00       431
   macro avg       1.00      1.00      1.00       431
weighted avg       1.00      1.00      1.00       431

********************************************************
Accuracy test: 0.7592592592592593
Confusion Matrix test:
 [[51  3  1]
 [13 23  0]
 [ 8  1  8]]
Classification Report test:
               precision    recall  f1-score   support

           0       0.71      0.93      0.80        55
           1       0.85      0.64      0.73        36
           2       0.89      0.47      0.62        17

    accuracy                           0.76       108
   macro avg       0.82      0.68      0.72  

In [70]:
# Make predictions
predictions = svm_classifier.predict(normalized_X_test)

In [71]:
test_df['labels'] = predictions


In [68]:
test_df['labels'].value_counts()

0    162
1     46
2     21
Name: labels, dtype: int64

In [56]:
size_mapping = {0: 'Benign',
        1: 'Malignant',
        2: 'Normal'}

test_df['labels'] = test_df['labels'].map(size_mapping)
test_df['labels']

0      Benign
1      Benign
2      Benign
3      Benign
4      Benign
        ...  
224    Benign
225    Benign
226    Benign
227    Benign
228    Normal
Name: labels, Length: 229, dtype: object

In [57]:
predictions = test_df['labels']
predictions

0      Benign
1      Benign
2      Benign
3      Benign
4      Benign
        ...  
224    Benign
225    Benign
226    Benign
227    Benign
228    Normal
Name: labels, Length: 229, dtype: object

In [58]:
submit = pd.read_csv('/content/sample_submition4.csv')
submit

Unnamed: 0,Id,Class
0,8560,A
1,7920,A
2,8993,A
3,8126,A
4,9912,A
...,...,...
224,8343,A
225,1255,A
226,4573,A
227,3919,A


In [59]:
submit['Id'] = test_df['Id']
submit['Class'] =predictions # our model predictions on the test dataset
submit

Unnamed: 0,Id,Class
0,8560,Benign
1,7920,Benign
2,8993,Benign
3,8126,Benign
4,9912,Benign
...,...,...
224,8343,Benign
225,1255,Benign
226,4573,Benign
227,3919,Benign


In [60]:
submit.to_csv('sample_submission4_fatima.csv', index=False)