# Classification Model: Classifying Brain Cancer diagnosis 

In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


In [12]:
BCdf = pd.read_csv("BrainCancerData.csv")
BCdf.head()

Unnamed: 0,ID,Sex,Diagnosis,Location,KI,GTV,Stereotactic methods,status,OS
0,1,0,0,0,90,6.11,0,0,57.64
1,2,1,2,1,90,19.35,1,1,8.98
2,3,0,0,0,70,7.95,0,0,26.46
3,4,0,1,1,80,7.61,1,1,47.8
4,6,1,2,1,90,5.06,1,1,6.3


In [7]:
# Separating the features (X) and target (y)


X = BCdf[['Sex', 'Location', 'KI', 'GTV', 'Stereotactic methods', 'status', 'OS']]
y = BCdf['Diagnosis']
   

In [8]:
# Splitting the dataset into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Applying Polynomial Regression to the dataset
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Creating and training a Logistic Regression model
logreg = LogisticRegression()
logreg.fit(X_train_poly, y_train)

# Creating and training a Naive Bayes model
gnb = GaussianNB()
gnb.fit(X_train, y_train)

# Creating and training a KNN model
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Predictions using the three models
y_pred_logreg = logreg.predict(X_test_poly)
y_pred_gnb = gnb.predict(X_test)
y_pred_knn = knn.predict(X_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [10]:
# Evaluating the Polynomial Regression model
accuracy_logreg = accuracy_score(y_test, y_pred_logreg)
confusion_logreg = confusion_matrix(y_test, y_pred_logreg)

print("Classification Report - Polynomial Regression")
print(classification_report(y_test, y_pred_logreg))
print("Accuracy: ", accuracy_logreg)
print("Confusion Matrix: \n", confusion_logreg)

# Evaluate Naive Bayes model
accuracy_gnb = accuracy_score(y_test, y_pred_gnb)
confusion_gnb = confusion_matrix(y_test, y_pred_gnb)

print("\nClassification Report - Naive Bayes")
print(classification_report(y_test, y_pred_gnb))
print("Accuracy: ", accuracy_gnb)
print("Confusion Matrix: \n", confusion_gnb)

# Evaluate KNN model
accuracy_knn = accuracy_score(y_test, y_pred_knn)
confusion_knn = confusion_matrix(y_test, y_pred_knn)

print("\nClassification Report - KNN")
print(classification_report(y_test, y_pred_knn))
print("Accuracy: ", accuracy_knn)
print("Confusion Matrix: \n", confusion_knn)

Classification Report - Polynomial Regression
              precision    recall  f1-score   support

           0       0.64      0.78      0.70         9
           1       0.00      0.00      0.00         2
           2       0.33      0.67      0.44         3
           3       1.00      0.25      0.40         4

    accuracy                           0.56        18
   macro avg       0.49      0.42      0.39        18
weighted avg       0.60      0.56      0.51        18

Accuracy:  0.5555555555555556
Confusion Matrix: 
 [[7 0 2 0]
 [1 0 1 0]
 [1 0 2 0]
 [2 0 1 1]]

Classification Report - Naive Bayes
              precision    recall  f1-score   support

           0       0.58      0.78      0.67         9
           1       0.00      0.00      0.00         2
           2       0.50      0.67      0.57         3
           3       0.00      0.00      0.00         4

    accuracy                           0.50        18
   macro avg       0.27      0.36      0.31        18
weighte

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Based on the output and our findings, Naive Bayes outperforms Polynomial Regression and KNN in the classification of brain cancer types. Compared to other approaches like KNN and Polynomial Regression its shown that Naive Bayes has the highest accuracy and a more balanced F1-score, indicating better performance overall.