FOR ORAL CANCER PREDICTION


In [1]:
import glob
import numpy as np
from PIL import Image
from numpy import array
from keras.preprocessing import image
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score, recall_score, precision_score

In [2]:
cancer_imgs = glob.glob('../Dataset/OralCancer/cancer' + '/*.jpg')
non_cancer_imgs = glob.glob('../Dataset/OralCancer/non-cancer/' + '/*.jpg')

In [3]:
dataset = []
labels = []

for img in cancer_imgs:
  dataset.append(image.img_to_array(image.load_img(img, target_size=(64, 64))))
  labels.append(1)

for img in non_cancer_imgs:
  dataset.append(image.img_to_array(image.load_img(img, target_size=(64, 64))))
  labels.append(0)
  
dataset = array(dataset)

In [None]:
import matplotlib.image as mpimg 
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
%matplotlib inline


print('Cancer Images: ')
for img in cancer_imgs[:3]:
  testim = mpimg.imread(img)
  plt.figure()
  imshow(testim)

In [6]:
def evaluate_model(clf, X_test, y_test, model_name):
  print('--------------------------------------------')
  print('Model ', model_name)
  y_pred = clf.predict(X_test)
  f1 = f1_score(y_test, y_pred.round(), average='weighted')
  recall = recall_score(y_test, y_pred.round(), average='weighted')
  precision = precision_score(y_test, y_pred.round(), average='weighted')
  print(classification_report(y_test, y_pred.round()))
  print("F1 Score ", f1)
  print("Recall ", recall)
  print("Precision ", precision)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(dataset, labels, test_size=0.25, random_state=42)
X_train = X_train.reshape(len(X_train),3*64*64)
X_test = X_test.reshape(len(X_test),3*64*64)

In [10]:
from sklearn import tree
oc_clf_tree = tree.DecisionTreeClassifier(random_state=42)
oc_clf_tree = oc_clf_tree.fit(X_train, y_train)

In [14]:
from sklearn.ensemble import RandomForestClassifier
sc = StandardScaler()
X2_train = sc.fit_transform(X_train)
X2_test = sc.transform(X_test)
oc_rf = RandomForestClassifier(n_estimators=2000,criterion='entropy', random_state=0)
oc_rf.fit(X2_train, y_train)

In [15]:
from sklearn import svm
oc_svm = svm.SVC(kernel='linear') # Linear Kernel
oc_svm.fit(X_train, y_train)

In [18]:
evaluate_model(oc_clf_tree, X_test, y_test, 'Decision Tree')


--------------------------------------------
Model  Decision Tree
              precision    recall  f1-score   support

           0       0.62      0.56      0.59         9
           1       0.76      0.81      0.79        16

    accuracy                           0.72        25
   macro avg       0.69      0.68      0.69        25
weighted avg       0.71      0.72      0.72        25

F1 Score  0.7160071301247772
Recall  0.72
Precision  0.7144117647058823


In [19]:
evaluate_model(oc_rf, X2_test, y_test, 'RandomForest')


--------------------------------------------
Model  RandomForest
              precision    recall  f1-score   support

           0       0.80      0.89      0.84         9
           1       0.93      0.88      0.90        16

    accuracy                           0.88        25
   macro avg       0.87      0.88      0.87        25
weighted avg       0.89      0.88      0.88        25

F1 Score  0.8812224108658744
Recall  0.88
Precision  0.8853333333333333


In [20]:
evaluate_model(oc_svm, X_test, y_test, 'SVM')

--------------------------------------------
Model  SVM
              precision    recall  f1-score   support

           0       0.64      0.78      0.70         9
           1       0.86      0.75      0.80        16

    accuracy                           0.76        25
   macro avg       0.75      0.76      0.75        25
weighted avg       0.78      0.76      0.76        25

F1 Score  0.764
Recall  0.76
Precision  0.7776623376623377


In [32]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dropout, BatchNormalization

dataset = dataset / 255.0
X_train, X_test, y_train, y_test = train_test_split(dataset, labels, test_size=0.3, random_state=42)

# Convert labels to categorical one-hot encoding
y_train = to_categorical(y_train, num_classes=2)
y_test = to_categorical(y_test, num_classes=2)

model = Sequential()

model.add(Conv2D(64, (3, 3), activation='relu', input_shape=(64, 64, 3)))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())

model.add(Dense(2, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}')

evaluate_model(model,X_test,y_test,"CNN")


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test Loss: 0.6573, Test Accuracy: 0.6333
--------------------------------------------
Model  CNN
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        11
           1       0.63      1.00      0.78        19

   micro avg       0.63      0.63      0.63        30
   macro avg       0.32      0.50      0.39        30
weighted avg       0.40      0.63      0.49        30
 samples avg       0.63      0.63      0.63        30

F1 Score  0.49115646258503404
Recall  0.6333333333333333
Precision  0.4011111111111111


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import LSTM, Dense
# from tensorflow.keras.utils import to_categorical
# from sklearn.model_selection import train_test_split
# import numpy as np

#  # Reshape data for LSTM
# time_steps = X_train.shape[1] 
# features = X_train.shape[2]

# print(time_steps,features)
# # Reshape X_train and X_test
# X_train_reshaped = X_train.reshape((X_train.shape[0], time_steps, features))
# X_test_reshaped = X_test.reshape((X_test.shape[0], time_steps, features))

# print(X_train_reshaped,X_test_reshaped)

# # Build the LSTM model
# lstm_model = Sequential()

# lstm_model.add(LSTM(64, input_shape=(time_steps,features), activation='relu'))
# lstm_model.add(Dense(2, activation='softmax'))

# # Compile the model
# lstm_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# # Train the model
# lstm_model.fit(X_train_reshaped, y_train, epochs=30, batch_size=32, validation_data=(X_test_reshaped, y_test))

# # Evaluate the model on the test set
# loss_lstm, accuracy_lstm = lstm_model.evaluate(X_test_reshaped, y_test)
# print(f'Test Loss: {loss_lstm:.4f}, Test Accuracy: {accuracy_lstm:.4f}')


