In [44]:
import os
import shutil
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [26]:
yes_folder_path = "brain_tumor_dataset/yes"
no_folder_path = "brain_tumor_dataset/no"
combined_folder_path = "Dataset"

for filename in os.listdir(yes_folder_path):
    src_path = os.path.join(yes_folder_path, filename)
    dst_path = os.path.join(combined_folder_path, "1_" + filename)
    shutil.copyfile(src_path, dst_path)

for filename in os.listdir(no_folder_path):
    src_path = os.path.join(no_folder_path, filename)
    dst_path = os.path.join(combined_folder_path, "0_" + filename)
    shutil.copyfile(src_path, dst_path)


In [45]:
# Set up the path to the combined folder
combined_folder_path = "Dataset"

# Create an empty list to store the image paths and labels
data = []

# Loop through the images in the combined folder
for filename in os.listdir(combined_folder_path):
    # Get the label from the filename
    label = filename.split("_")[0]
    
    # Map the label to a binary label (1 for "yes", 0 for "no")
    binary_label = 1 if label == '1' else 0
    
    # Get the full path to the image
    image_path = os.path.join(combined_folder_path, filename)
    
    # Load the image using OpenCV and convert it to grayscale
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    # Resize the image to a fixed size (e.g. 64x64)
    image = cv2.resize(image, (64, 64))
    
    # Flatten the image array into a 1D vector
    image = image.flatten()
    
    # Append the flattened image and binary label to the data list
    data.append((image, binary_label))

# Create a pandas DataFrame from the data list
df = pd.DataFrame(data, columns=["image", "binary_label"])

### Making a dataframe with image and respective classification.

### K-NN

In [46]:
from sklearn.neighbors import KNeighborsClassifier

In [53]:
# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(df["image"], df["binary_label"], test_size=0.2, random_state=42)

# Convert the image data to numpy arrays
x_train = np.array(x_train.tolist())
x_test = np.array(x_test.tolist())

# Create a KNN classifier with k=5
knn = KNeighborsClassifier(n_neighbors=5)

# Train the classifier on the training data
knn.fit(x_train, y_train)

# Make predictions on the testing data
y_pred = knn.predict(x_test)

# Evaluate the classifier using binary classification metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, zero_division=1)
recall = recall_score(y_test, y_pred, zero_division=1)
f1 = f1_score(y_test, y_pred, zero_division=1)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.7058823529411765
Precision: 0.9047619047619048
Recall: 0.59375
F1-score: 0.7169811320754718


#### Hyperparameter Optimization

In [58]:


param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11, 13, 15],
    'weights': ['uniform', 'distance'],
    'p': [1, 2]
}

knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(x_train, y_train)

print("Best hyperparameters:", grid_search.best_params_)
print("Best accuracy score:", grid_search.best_score_)


Best hyperparameters: {'n_neighbors': 11, 'p': 1, 'weights': 'distance'}
Best accuracy score: 0.7919512195121952


### SVM

In [None]:
from sklearn.svm import SVC

In [36]:
# Create an SVM classifier with a linear kernel
svm = SVC(kernel='linear')

# Train the classifier on the training data
svm.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = svm.predict(X_test)

# Evaluate the classifier using binary classification metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, zero_division=1)
recall = recall_score(y_test, y_pred, zero_division=1)
f1 = f1_score(y_test, y_pred, zero_division=1)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.7058823529411765
Precision: 0.7428571428571429
Recall: 0.8125
F1-score: 0.7761194029850748


#### Hyperparameter Optimization

In [61]:
from sklearn.metrics import classification_report

In [64]:
# define parameter grid to search over


from sklearn.model_selection import GridSearchCV


param_grid = {'C': [0.1, 1, 10, 100], 
              'kernel': ['linear', 'rbf', 'poly'], 
              'degree': [2, 3, 4], 
              'gamma': ['scale', 'auto']}

# create instance of SVM classifier
clf = SVC()

# create instance of GridSearchCV
grid_search = GridSearchCV(clf, param_grid, cv=5)

# fit the grid search object to the training data
grid_search.fit(x_train, y_train)

# print the best hyperparameters found by GridSearchCV
print("Best hyperparameters:", grid_search.best_params_)

# use the best model to make predictions on the test data
y_pred = grid_search.predict(x_test)

print("Best accuracy score:", grid_search.best_score_)

# print classification report
print(classification_report(y_test, y_pred))


Best hyperparameters: {'C': 10, 'degree': 2, 'gamma': 'scale', 'kernel': 'rbf'}
Best accuracy score: 0.8269512195121951
              precision    recall  f1-score   support

           0       0.87      0.68      0.76        19
           1       0.83      0.94      0.88        32

    accuracy                           0.84        51
   macro avg       0.85      0.81      0.82        51
weighted avg       0.85      0.84      0.84        51



### Random Forest Classifier

In [38]:
from sklearn.ensemble import RandomForestClassifier

In [39]:
# Create a random forest classifier with 100 trees
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier on the training data
rf.fit(x_train, y_train)

# Make predictions on the testing data
y_pred = rf.predict(x_test)

# Evaluate the classifier using binary classification metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred, zero_division=1)
f1 = f1_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.6274509803921569
Precision: 0.6585365853658537
Recall: 0.84375
F1-score: 0.7397260273972602


#### Hyperparameter Optimization

In [66]:
# define parameter grid to search over
param_grid = {'n_estimators': [100, 300, 500],
              'max_depth': [None, 10, 50, 100],
              'min_samples_split': [2, 5, 10],
              'min_samples_leaf': [1, 2, 4],
              'max_features': ['sqrt', 'log2']}

# create instance of Random Forest Classifier
clf = RandomForestClassifier()

# create instance of GridSearchCV
grid_search = GridSearchCV(clf, param_grid, cv=5)

# fit the grid search object to the training data
grid_search.fit(x_train, y_train)

# print the best hyperparameters found by GridSearchCV
print("Best hyperparameters:", grid_search.best_params_)
print("Best accuracy score:", grid_search.best_score_)
print("Best accuracy score:", grid_search.best_score_)

# use the best model to make predictions on the test data
y_pred = grid_search.predict(x_test)

# print classification report
print(classification_report(y_test, y_pred))


Best hyperparameters: {'max_depth': None, 'max_features': 'log2', 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
Best accuracy score: 0.8265853658536585
              precision    recall  f1-score   support

           0       0.83      0.79      0.81        19
           1       0.88      0.91      0.89        32

    accuracy                           0.86        51
   macro avg       0.86      0.85      0.85        51
weighted avg       0.86      0.86      0.86        51

