In [None]:
# mounting Google Drive in Colab
# This code mounts Google Drive in a Colab notebook, allowing access to files stored there.

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# importing libraries
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score


In [None]:
# Loading dataset and labeling images
dataset_dir = "/content/drive/MyDrive/training_set"
labels_map = {
    "benign": "Low",
    "malignant": "High"
}

In [None]:
# Setting image size and initializing lists for images and labels
IMG_SIZE = (224, 224)
X = []
y = []

# Looping through dataset directory to load images and labels

for label in os.listdir(dataset_dir):
    class_dir = os.path.join(dataset_dir, label)
    if not os.path.isdir(class_dir): continue

    for img_file in os.listdir(class_dir)[:500]:  # limit for speed
        img_path = os.path.join(class_dir, img_file)
        try:
            img = load_img(img_path, target_size=IMG_SIZE)
            img_array = img_to_array(img)
            img_array = preprocess_input(img_array)
            X.append(img_array)
            y.append(labels_map[label])
        except Exception as e:
            print(f"Skipped {img_file}: {e}")


In [None]:
# Converting lists to numpy arrays and reshaping for VGG16 input
model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
features = model.predict(np.array(X), verbose=1)
features_flattened = features.reshape(features.shape[0], -1)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m459s[0m 18s/step


In [None]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    features_flattened, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:
# Training a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)


In [None]:
# Evaluating the model
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("F1 Score:", f1)


Accuracy: 0.8848484848484849
F1 Score: 0.8830453293263211
