In [3]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from skimage.feature import hog



In [4]:
# Mount Google Drive to access the dataset.
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
#add your folder name here and place it with main.py
cl = ['CANCER','NON CANCER']

# Define the dataset directory path
data_dir = ('/content/drive/MyDrive/Project/DATA SET') # Make sure this path is correct

#Extracting features and labels from images
def read_image():
    imgf=[]
    lab=[]
    for ct in cl:
        # Construct the full path to the class folder
        class_folder_path = os.path.join(data_dir, ct)
        # Check if the class folder exists before listing files
        if not os.path.exists(class_folder_path):
            print(f"Warning: Folder '{class_folder_path}' not found. Skipping.")
            continue

        for img in os.listdir(class_folder_path):
            # Construct the full path to the image
            cim = os.path.join(class_folder_path, img)
            # Check if the path is a file before reading
            if os.path.isfile(cim):
                img = cv2.imread(cim, cv2.IMREAD_GRAYSCALE)
                # Check if the image was read successfully
                if img is None:
                    print(f"Warning: Could not read image {cim}. Skipping.")
                    continue
                img=cv2.resize(img,(224,224))

                #if hog used then no need for reshape hog helps in feature extraction from images
                hog_features = hog(img, orientations=8, pixels_per_cell=(8, 8), cells_per_block=(1, 1), visualize=False)
                imgf.append(hog_features)
                lab.append(ct)
            else:
                 print(f"Skipping non-file entry: {cim}")
    return np.array(imgf),np.array(lab)

# Add error handling in case no images are loaded
try:
    imgn, lab = read_image()
    if len(imgn) == 0:
        print("No images found or processed. Please check your data path and folder structure.")
    else:
        # 80%-20% train test split ratio
        xtrain, xtest, ytrain, ytest = train_test_split(imgn, lab, test_size=0.2, random_state=42) # Added random_state for reproducibility

        #training model change C to adjust margin to get more accurate result, same with scaling and kernal change it also
        reg = SVC(kernel='rbf', C=1, gamma='scale')
        reg.fit(xtrain,ytrain)
        print("Model training complete.")

        # Evaluate the model on the test set
        accuracy = reg.score(xtest, ytest)
        print(f"\nModel Accuracy on test set: {accuracy * 100:.2f}%")


        # --- Prediction on a single image ---
        print("\nPredicting on a single test image...")
        # Give an image path here for prediction
        # Replace "test/CANCER/246.jpeg" with the actual path to a test image in your drive
        # For example, if your test image is in the dataset directory:
        test_image_path = '/content/drive/MyDrive/Project/DATA SET/CANCER/CANCER_630.jpg'
        #test_image_path = '/content/drive/MyDrive/Project/DATA SET/NON CANCER/NON CANCER_31.jpg' # Correct path based on dataset structure

        o_t_img = cv2.imread(test_image_path, cv2.IMREAD_GRAYSCALE)
        if o_t_img is not None:
            t_img=cv2.resize(o_t_img,(224,224))

            #Give an image path here
            hog_features = hog(t_img, orientations=8, pixels_per_cell=(8, 8), cells_per_block=(1, 1), visualize=False)
            print(f"The image is of a :{reg.predict(np.array([hog_features]))[0]}")

            # Note: cv2.imshow does not work directly in Google Colab.
            # Use matplotlib to display images in Colab.
            # import matplotlib.pyplot as plt
            # plt.imshow(o_t_img, cmap='gray')
            # plt.title("Test Image")
            # plt.axis('off')
            # plt.show()

            # cv2.waitKey also won't work as expected in Colab notebooks for displaying images.
            # If you must use cv2.imshow/waitKey, it's typically in a local environment or a dedicated script.
            # cv2.imshow("image",o_t_img)
            # cv2.waitKey(7000)
        else:
            print(f"Error: Could not read test image from path: {test_image_path}")

except FileNotFoundError as e:
    print(f"A file or directory was not found: {e}")
except Exception as e:
    print(f"An error occurred: {e}")

Model training complete.

Model Accuracy on test set: 89.02%

Predicting on a single test image...
The image is of a :CANCER


In [6]:
os.listdir('/content/drive/MyDrive/Project/DATA SET/CANCER')

['CANCER_22.jpg',
 'CANCER_24.jpg',
 'CANCER_26.jpg',
 'CANCER_12.jpg',
 'CANCER_15.jpg',
 'CANCER_20.jpg',
 'CANCER_6.jpg',
 'CANCER_7.jpg',
 'CANCER_11.jpg',
 'CANCER_10.jpg',
 'CANCER_0.jpg',
 'CANCER_23.jpg',
 'CANCER_19.jpg',
 'CANCER_3.jpg',
 'CANCER_8.jpg',
 'CANCER_9.jpg',
 'CANCER_2.jpg',
 'CANCER_25.jpg',
 'CANCER_18.jpg',
 'CANCER_16.jpg',
 'CANCER_14.jpg',
 'CANCER_1.jpg',
 'CANCER_4.jpg',
 'CANCER_28.jpg',
 'CANCER_21.jpg',
 'CANCER_17.jpg',
 'CANCER_13.jpg',
 'CANCER_5.jpg',
 'CANCER_27.jpg',
 'CANCER_94.jpg',
 'CANCER_91.jpg',
 'CANCER_42.jpg',
 'CANCER_95.jpg',
 'CANCER_72.jpg',
 'CANCER_73.jpg',
 'CANCER_55.jpg',
 'CANCER_71.jpg',
 'CANCER_54.jpg',
 'CANCER_38.jpg',
 'CANCER_45.jpg',
 'CANCER_87.jpg',
 'CANCER_90.jpg',
 'CANCER_47.jpg',
 'CANCER_70.jpg',
 'CANCER_48.jpg',
 'CANCER_58.jpg',
 'CANCER_85.jpg',
 'CANCER_50.jpg',
 'CANCER_59.jpg',
 'CANCER_43.jpg',
 'CANCER_30.jpg',
 'CANCER_60.jpg',
 'CANCER_66.jpg',
 'CANCER_96.jpg',
 'CANCER_81.jpg',
 'CANCER_34.jpg',
 '