In [1]:
# %tensorflow_version 2.x

import warnings 
warnings.filterwarnings("ignore")

import os
import glob
import cv2
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from skimage.io import imread, imsave
from skimage.transform import resize 
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from tensorflow.keras.applications import vgg16
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import SGD, Adam, RMSprop

import tensorflow as tf
import tensorflow.keras.backend as K
from tqdm import tqdm

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
from pathlib import Path
train_dir = Path('/content/drive/MyDrive/OralCancer/Final/Phase 1/Tongue/Train/')
val_dir = Path('/content/drive/MyDrive/OralCancer/Final/Phase 1/Tongue/Val')

cat = ['Cancer','Non-Cancer']
labels_dict = {"Cancer":0,"Non-Cancer":1}


# Preprocessing & Loading

In [4]:
from keras.preprocessing import image
dirs = train_dir.glob("*")

train_img = []
train_labels = []

for folder_dir in dirs:
    label = str(folder_dir).split("/")[-1][:-1]+"r"
    
    for img_path in folder_dir.glob("*.jpg"):
        img = image.load_img(img_path,target_size=(224,224))
        img_array = image.img_to_array(img)
        train_img.append(img_array)
        train_labels.append(labels_dict[label])

In [5]:
dirs = val_dir.glob("*")

test_img = []
test_labels = []

for folder_dir in dirs:
    label = str(folder_dir).split("/")[-1][:-1]+"r"
    
    for img_path in folder_dir.glob("*.jpg"):
        img = image.load_img(img_path,target_size=(224,224))
        img_array = image.img_to_array(img)
        test_img.append(img_array)
        test_labels.append(labels_dict[label])

In [6]:
import numpy as np
train_img = np.array(train_img,dtype='float32')/255.0
train_labels = np.array(train_labels)

test_img = np.array(test_img,dtype='float32')/255.0
test_labels = np.array(test_labels)

print(train_img.shape)
print(test_img.shape)


(280, 224, 224, 3)
(93, 224, 224, 3)


In [8]:
m,n = train_img.shape[0], test_img.shape[0]
train_img = train_img.reshape(m,-1)
test_img = test_img.reshape(n,-1)

print(train_img.shape)
print(train_labels.shape)
print(test_img.shape)
print(test_labels.shape)

(280, 150528)
(280,)
(93, 150528)
(93,)


In [9]:
print(train_img.shape,train_labels.shape)
print(test_img.shape,test_labels.shape)

(280, 150528) (280,)
(93, 150528) (93,)


In [10]:
classes =len(np.unique(train_labels))
print(classes)

2


In [11]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
train_img = sc.fit_transform(train_img)
test_img = sc.transform(test_img)

# SVM
## rbf

In [24]:
from sklearn import svm
svm_classifier_ = svm.SVC(kernel="rbf",gamma='auto',probability=True)

In [25]:
svm_classifier_.fit(train_img,train_labels)
svm_classifier_.score(train_img,train_labels)

1.0

In [26]:
y_pred = svm_classifier_.predict_proba(test_img)

In [None]:
print(y_pred)

In [15]:
from sklearn.metrics import accuracy_score
print("Accuracy on unknown data is",accuracy_score(test_labels, y_pred))

Accuracy on unknown data is 1.0


In [17]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(test_labels, y_pred)

In [29]:
import pickle
with open('/content/drive/MyDrive/OralCancer/Models/svm_pkl', 'wb') as files:
    pickle.dump(svm_classifier_, files)

### linear

In [20]:
from sklearn import svm
svm_classifier = svm.SVC(kernel="linear",gamma='auto')
svm_classifier.fit(train_img,train_labels)
print(svm_classifier.score(train_img,train_labels))
y_pred = svm_classifier.predict(test_img)
from sklearn.metrics import accuracy_score
print("Accuracy on unknown data is",accuracy_score(test_labels, y_pred))
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(test_labels, y_pred)

1.0
Accuracy on unknown data is 1.0


In [50]:
import pickle
with open('/content/drive/MyDrive/OralCancer/Models/svc_pkl', 'wb') as files:
    pickle.dump(svc, files)

# Random Forrest

In [30]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(train_img)
X_test = sc.transform(test_img)

In [33]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=50, criterion='entropy', random_state=0)
classifier.fit(X_train, train_labels)
classifier.score(X_train, train_labels)

1.0

In [34]:
y_pred = classifier.predict(X_test)
print(y_pred)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [35]:
from sklearn.metrics import accuracy_score
print("Accuracy on unknown data is",accuracy_score(test_labels,y_pred))

Accuracy on unknown data is 1.0


In [36]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(test_labels, y_pred)
print(cm)

[[31  0]
 [ 0 62]]


In [37]:
with open('/content/drive/MyDrive/OralCancer/Models/random_pkl', 'wb') as files:
    pickle.dump(classifier, files)

# Adaboost

In [38]:
from sklearn.ensemble import AdaBoostClassifier
adab = AdaBoostClassifier()
adab.fit(X_train, train_labels)
adab.score(X_train, train_labels)

1.0

In [39]:
# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(test_labels, y_pred)
print(cm)
y2 = classifier.predict(X_test)
from sklearn.metrics import accuracy_score
print("Accuracy on unknown data is",accuracy_score(test_labels,y2))

[[31  0]
 [ 0 62]]
Accuracy on unknown data is 1.0


In [41]:
import pickle
with open('/content/drive/MyDrive/OralCancer/Models/adab_pkl', 'wb') as files:
    pickle.dump(adab, files)