In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.initializers import Constant
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy, BinaryCrossentropy
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay, classification_report

In [4]:
Chest_CT_IMG_SIZE = 224
Chest_CT_IMG_CHANNEL = 3
Chest_CT_BATCH_SIZE = 64
Chest_CT_COLOR_MODE = 'rgb'

In [5]:
Chest_CT_PATH = "F:\Studies\Third_year\Computer_vision\Project\ProjectCode\Dataset\Data"
Chest_CT_CLASSNAMES = sorted(os.listdir(Chest_CT_PATH + '/train'))

In [6]:
Chest_CT_CLASSNAMES

['adenocarcinoma', 'large.cell.carcinoma', 'normal', 'squamous.cell.carcinoma']

In [7]:
Chest_CT_train_set = image_dataset_from_directory(
    Chest_CT_PATH + '/train',
    labels='inferred',
    label_mode='categorical',
    class_names=Chest_CT_CLASSNAMES,
    color_mode=Chest_CT_COLOR_MODE,
    batch_size=Chest_CT_BATCH_SIZE,
    image_size=(Chest_CT_IMG_SIZE, Chest_CT_IMG_SIZE),
    interpolation='bilinear'
)

Found 613 files belonging to 4 classes.


In [9]:
Chest_CT_dev_set = image_dataset_from_directory(
    Chest_CT_PATH + '/valid',
    labels='inferred',
    label_mode='categorical',
    class_names=Chest_CT_CLASSNAMES,
    color_mode=Chest_CT_COLOR_MODE,
    batch_size=Chest_CT_BATCH_SIZE,
    image_size=(Chest_CT_IMG_SIZE, Chest_CT_IMG_SIZE),
    interpolation='bilinear'
)

Found 72 files belonging to 4 classes.


In [11]:
Chest_CT_test_set = image_dataset_from_directory(
    Chest_CT_PATH + '/test',
    labels='inferred',
    label_mode='categorical',
    class_names=Chest_CT_CLASSNAMES,
    color_mode=Chest_CT_COLOR_MODE,
    batch_size=Chest_CT_BATCH_SIZE,
    image_size=(Chest_CT_IMG_SIZE, Chest_CT_IMG_SIZE),
    interpolation='bilinear'
)

Found 315 files belonging to 4 classes.


In [None]:
# from keras.applications.vgg16 import VGG16
# vgg = VGG16(include_top=False, input_shape=(227,227,3))

# for layer in vgg.layers:
#   layer.trainable = False

# flat = Flatten() (vgg.layers[-1].output)
# fc1 = Dense(1024, activation='relu') (flat)
# output = Dense(4, activation='softmax') (fc1)
# model_bt1 = Model(inputs=vgg.inputs, outputs=output)

# **VGG16**

In [15]:
from keras.applications.vgg16 import VGG16
vgg = VGG16(include_top=False, input_shape=(224,224,3))

for layer in vgg.layers:
  layer.trainable = False
feature_extractor = Model(inputs=vgg.input, outputs=vgg.output)

In [16]:
all_features = []
all_labels = []

for batch_images, batch_labels in Chest_CT_train_set:
    # Chuẩn hóa đầu vào
    # batch_images = preprocess_input(batch_images.numpy())

    # Trích đặc trưng
    features = feature_extractor.predict(batch_images)
    features = features.reshape((features.shape[0], -1))  # flatten

    all_features.append(features)
    all_labels.append(batch_labels.numpy())

# Nối lại thành mảng numpy
X_train = np.concatenate(all_features, axis=0)
y_train = np.concatenate(all_labels, axis=0)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 780ms/step


In [17]:
all_features = []
all_labels = []

for batch_images, batch_labels in Chest_CT_test_set:
    # Chuẩn hóa đầu vào
    # batch_images = preprocess_input(batch_images.numpy())

    # Trích đặc trưng
    features = feature_extractor.predict(batch_images)
    features = features.reshape((features.shape[0], -1))  # flatten

    all_features.append(features)
    all_labels.append(batch_labels.numpy())

# Nối lại thành mảng numpy
X_test = np.concatenate(all_features, axis=0)
y_test = np.concatenate(all_labels, axis=0)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step


In [18]:
y_train_new = np.argmax(y_train, axis=1)
y_test_new = np.argmax(y_test, axis=1)

#### **SVC**

In [22]:
model = SVC(
            C=1, 
            kernel='rbf', 
            degree=3, 
            random_state=42
        )

model.fit(X_train, y_train_new)

In [23]:
y_pred = model.predict(X_test)

In [24]:
print(f'Accuracy: {accuracy_score(y_test_new, y_pred)}')
print(f"Precision: {precision_score(y_test_new, y_pred, average='macro')}")
print(f"Recall: {recall_score(y_test_new, y_pred, average='macro')}")
print(f"F1 macro: {f1_score(y_test_new, y_pred, average='macro')}")

Accuracy: 0.8031746031746032
Precision: 0.8400069618490671
Recall: 0.831318082788671
F1 macro: 0.8158277241856071


#### **RANDOM FOREST**

In [26]:
model = RandomForestClassifier(
            n_estimators=100, 
            criterion='gini', 
            random_state=42
        )
model.fit(X_train, y_train_new)

In [27]:
y_pred = model.predict(X_test)

In [28]:
print(f'Accuracy: {accuracy_score(y_test_new, y_pred)}')
print(f"Precision: {precision_score(y_test_new, y_pred, average='macro')}")
print(f"Recall: {recall_score(y_test_new, y_pred, average='macro')}")
print(f"F1 macro: {f1_score(y_test_new, y_pred, average='macro')}")

Accuracy: 0.6412698412698413
Precision: 0.6979166666666667
Recall: 0.6188997821350763
F1 macro: 0.622903290565659


#### **LOGISTIC REGRESSION**

In [29]:
model = LogisticRegression(
            C=1.0,
            solver='lbfgs',
            max_iter=1000,
            multi_class='multinomial',
            random_state=42
        )

model.fit(X_train, y_train_new)



In [30]:
y_pred = model.predict(X_test)

In [31]:
print(f'Accuracy: {accuracy_score(y_test_new, y_pred)}')
print(f"Precision: {precision_score(y_test_new, y_pred, average='macro')}")
print(f"Recall: {recall_score(y_test_new, y_pred, average='macro')}")
print(f"F1 macro: {f1_score(y_test_new, y_pred, average='macro')}")

Accuracy: 0.873015873015873
Precision: 0.8797438074328203
Recall: 0.8805010893246187
F1 macro: 0.8757012250503394


#### **XGBoost**

In [None]:
model = XGBClassifier(
            n_estimators=100,
            learning_rate=0.1,
            max_depth=6,
            random_state=42,
            objective='multi:softmax',
            num_class=4
        )
model.fit(X_train, y_train_new)

In [None]:
y_pred = model.predict(X_test)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3s/step


In [None]:
print(f'Accuracy: {accuracy_score(y_test_new, y_pred)}')
print(f"Precision: {precision_score(y_test_new, y_pred, average='macro')}")
print(f"Recall: {recall_score(y_test_new, y_pred, average='macro')}")
print(f"F1 macro: {f1_score(y_test_new, y_pred, average='macro')}")

# **RESNET50**

In [38]:
from keras.applications import ResNet50
res = ResNet50(include_top=False, input_shape=(224,224,3))

for layer in res.layers:
  layer.trainable = False
feature_extractor = Model(inputs=res.input, outputs=res.output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


In [39]:
all_features = []
all_labels = []

for batch_images, batch_labels in Chest_CT_train_set:
    # Chuẩn hóa đầu vào
    # batch_images = preprocess_input(batch_images.numpy())

    # Trích đặc trưng
    features = feature_extractor.predict(batch_images)
    features = features.reshape((features.shape[0], -1))  # flatten

    all_features.append(features)
    all_labels.append(batch_labels.numpy())

# Nối lại thành mảng numpy
X_train = np.concatenate(all_features, axis=0)
y_train = np.concatenate(all_labels, axis=0)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3s/step


In [40]:
all_features = []
all_labels = []

for batch_images, batch_labels in Chest_CT_test_set:
    # Chuẩn hóa đầu vào
    # batch_images = preprocess_input(batch_images.numpy())

    # Trích đặc trưng
    features = feature_extractor.predict(batch_images)
    features = features.reshape((features.shape[0], -1))  # flatten

    all_features.append(features)
    all_labels.append(batch_labels.numpy())

# Nối lại thành mảng numpy
X_test = np.concatenate(all_features, axis=0)
y_test = np.concatenate(all_labels, axis=0)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step


In [41]:
y_train_new = np.argmax(y_train, axis=1)
y_test_new = np.argmax(y_test, axis=1)

#### **SVC**

In [42]:
model = SVC(
            C=1, 
            kernel='rbf', 
            degree=3, 
            random_state=42
        )

model.fit(X_train, y_train_new)
y_pred = model.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test_new, y_pred)}')
print(f"Precision: {precision_score(y_test_new, y_pred, average='macro')}")
print(f"Recall: {recall_score(y_test_new, y_pred, average='macro')}")
print(f"F1 macro: {f1_score(y_test_new, y_pred, average='macro')}")

Accuracy: 0.819047619047619
Precision: 0.8404513387645456
Recall: 0.8126497821350762
F1 macro: 0.8237853230263998


#### **RANDOM FOREST**

In [43]:
model = RandomForestClassifier(
            n_estimators=100, 
            criterion='gini', 
            random_state=42
        )

model.fit(X_train, y_train_new)
y_pred = model.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test_new, y_pred)}')
print(f"Precision: {precision_score(y_test_new, y_pred, average='macro')}")
print(f"Recall: {recall_score(y_test_new, y_pred, average='macro')}")
print(f"F1 macro: {f1_score(y_test_new, y_pred, average='macro')}")

Accuracy: 0.6571428571428571
Precision: 0.6734300249309345
Recall: 0.6247821350762527
F1 macro: 0.616882567679709


#### **LOGISTIC REGRESSION**

In [44]:
model = LogisticRegression(
            C=1.0,
            solver='lbfgs',
            max_iter=1000,
            multi_class='multinomial',
            random_state=42
        )
model.fit(X_train, y_train_new)
y_pred = model.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test_new, y_pred)}')
print(f"Precision: {precision_score(y_test_new, y_pred, average='macro')}")
print(f"Recall: {recall_score(y_test_new, y_pred, average='macro')}")
print(f"F1 macro: {f1_score(y_test_new, y_pred, average='macro')}")



Accuracy: 0.8476190476190476
Precision: 0.8536174242424244
Recall: 0.8750272331154684
F1 macro: 0.8578412392111359
