In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import random
import cv2
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, log_loss, roc_auc_score, f1_score, mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline


In [23]:

# Defining directory containing the dataset
dataset_directory = '/content/drive/MyDrive/Project  folder/The IQ-OTHNCCD lung cancer dataset/The IQ-OTHNCCD lung cancer dataset'
dataset_directory

'/content/drive/MyDrive/Project  folder/The IQ-OTHNCCD lung cancer dataset/The IQ-OTHNCCD lung cancer dataset'

In [14]:
# Specifying categories
categories = ['Bengin cases', 'Malignant cases', 'Normal cases']


In [28]:
# Function to load and preprocess data
def load_and_preprocess_data(directory, categories, img_size=256, test_size=3, random_state=42):
    data = []
    for category in categories:
        path = os.path.join(directory, category)
        class_num = categories.index(category)
        for file in os.listdir(path):
            filepath = os.path.join(path, file)
            img = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (img_size, img_size))
            data.append([img, class_num])

    random.shuffle(data)
    X, y = [], []
    for feature, label in data:
        X.append(feature)
        y.append(label)

    X = np.array(X).reshape(-1, img_size * img_size)
    X = X / 255.0
    y = np.array(y)

    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=test_size, random_state=random_state)

    return X_train, X_valid, y_train, y_valid


In [29]:
# Function to handle class imbalance
def handle_class_imbalance(X_train, y_train):
    smote = SMOTE()
    X_train_sampled, y_train_sampled = smote.fit_resample(X_train, y_train)
    return X_train_sampled, y_train_sampled


In [30]:
# Loading and preprocessing data
X_train, X_valid, y_train, y_valid = load_and_preprocess_data(dataset_directory, categories)


In [31]:
# Handling class imbalance
X_train_sampled, y_train_sampled = handle_class_imbalance(X_train, y_train)

In [32]:
# Building and training the SVM classifier
svm_model = Pipeline([('scaler', StandardScaler()), ('svc', SVC(kernel='linear', probability=True))])
svm_model.fit(X_train_sampled, y_train_sampled)

In [33]:
# Evaluating the SVM model
y_pred_svm = svm_model.predict(X_valid)

# Function to calculate classification accuracy
def calculate_classification_accuracy(y_true, y_pred):
    return accuracy_score(y_true, y_pred)

# Function to calculate logarithmic loss
def calculate_logarithmic_loss(y_true, y_pred_proba):
    return log_loss(y_true, y_pred_proba)

# Function to calculate confusion matrix
def calculate_confusion_matrix(y_true, y_pred):
    return confusion_matrix(y_true, y_pred)

# Function to calculate area under the curve (ROC AUC score)
def calculate_roc_auc_score(y_true, y_pred_proba):
    return roc_auc_score(y_true, y_pred_proba, multi_class='ovr')

# Function to calculate F1 score
def calculate_f1_score(y_true, y_pred):
    return f1_score(y_true, y_pred, average='weighted')

# Function to calculate mean absolute error
def calculate_mean_absolute_error(y_true, y_pred):
    return mean_absolute_error(y_true, y_pred)

# Function to calculate mean squared error
def calculate_mean_squared_error(y_true, y_pred):
    return mean_squared_error(y_true, y_pred)


In [34]:
# Calculate evaluation metrics
acc = calculate_classification_accuracy(y_valid, y_pred_svm)
ll = calculate_logarithmic_loss(y_valid, svm_model.predict_proba(X_valid))
cm = calculate_confusion_matrix(y_valid, y_pred_svm)
roc_auc = calculate_roc_auc_score(y_valid, svm_model.predict_proba(X_valid))
f1 = calculate_f1_score(y_valid, y_pred_svm)
mae = calculate_mean_absolute_error(y_valid, y_pred_svm)
mse = calculate_mean_squared_error(y_valid, y_pred_svm)


In [35]:


# Print evaluation metrics
print("Evaluation Metrics for SVM Classifier:")
print("Classification Accuracy:", acc)
print("Logarithmic Loss:", ll)
print("Confusion Matrix:\n", cm)
print("ROC AUC Score:", roc_auc)
print("F1 Score:", f1)
print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)


Evaluation Metrics for SVM Classifier:
Classification Accuracy: 1.0
Logarithmic Loss: 0.003055714382460289
Confusion Matrix:
 [[1 0 0]
 [0 1 0]
 [0 0 1]]
ROC AUC Score: 1.0
F1 Score: 1.0
Mean Absolute Error: 0.0
Mean Squared Error: 0.0


In [36]:
# Calculate training accuracy
y_pred_train = svm_model.predict(X_train_sampled)
train_accuracy = accuracy_score(y_train_sampled, y_pred_train)

# Print training accuracy
print("Training Accuracy:", train_accuracy)

# Print test accuracy (which you already calculated)
print("Test Accuracy:", acc)


Training Accuracy: 1.0
Test Accuracy: 1.0


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import random
import cv2
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, log_loss, roc_auc_score, f1_score, mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Setting up Google Colab if applicable

# Defining directory containing the dataset
dataset_directory = '/content/drive/MyDrive/Project  folder/The IQ-OTHNCCD lung cancer dataset/The IQ-OTHNCCD lung cancer dataset'

# Specifying categories
categories = ['Bengin cases', 'Malignant cases', 'Normal cases']

# Function to load and preprocess data
def load_and_preprocess_data(directory, categories, img_size=256, test_size=3, random_state=42):
    data = []
    for category in categories:
        path = os.path.join(directory, category)
        class_num = categories.index(category)
        for file in os.listdir(path):
            filepath = os.path.join(path, file)
            img = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (img_size, img_size))
            data.append([img, class_num])

    random.shuffle(data)
    X, y = [], []
    for feature, label in data:
        X.append(feature)
        y.append(label)

    X = np.array(X).reshape(-1, img_size * img_size)
    X = X / 255.0
    y = np.array(y)

    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=test_size, random_state=random_state)

    return X_train, X_valid, y_train, y_valid

# Function to handle class imbalance
def handle_class_imbalance(X_train, y_train):
    smote = SMOTE()
    X_train_sampled, y_train_sampled = smote.fit_resample(X_train, y_train)
    return X_train_sampled, y_train_sampled

# Loading and preprocessing data
X_train, X_valid, y_train, y_valid = load_and_preprocess_data(dataset_directory, categories)

# Handling class imbalance
X_train_sampled, y_train_sampled = handle_class_imbalance(X_train, y_train)

# Building and training the SVM classifier
svm_model = Pipeline([('scaler', StandardScaler()), ('svc', SVC(kernel='linear', probability=True))])
svm_model.fit(X_train_sampled, y_train_sampled)

# Evaluating the SVM model
y_pred_svm = svm_model.predict(X_valid)

# Function to calculate classification accuracy
def calculate_classification_accuracy(y_true, y_pred):
    return accuracy_score(y_true, y_pred)

# Function to calculate logarithmic loss
def calculate_logarithmic_loss(y_true, y_pred_proba):
    return log_loss(y_true, y_pred_proba)

# Function to calculate confusion matrix
def calculate_confusion_matrix(y_true, y_pred):
    return confusion_matrix(y_true, y_pred)

# Function to calculate area under the curve (ROC AUC score)
def calculate_roc_auc_score(y_true, y_pred_proba):
    return roc_auc_score(y_true, y_pred_proba, multi_class='ovr')

# Function to calculate F1 score
def calculate_f1_score(y_true, y_pred):
    return f1_score(y_true, y_pred, average='weighted')

# Function to calculate mean absolute error
def calculate_mean_absolute_error(y_true, y_pred):
    return mean_absolute_error(y_true, y_pred)

# Function to calculate mean squared error
def calculate_mean_squared_error(y_true, y_pred):
    return mean_squared_error(y_true, y_pred)

# Calculate evaluation metrics
acc = calculate_classification_accuracy(y_valid, y_pred_svm)
ll = calculate_logarithmic_loss(y_valid, svm_model.predict_proba(X_valid))
cm = calculate_confusion_matrix(y_valid, y_pred_svm)
roc_auc = calculate_roc_auc_score(y_valid, svm_model.predict_proba(X_valid))
f1 = calculate_f1_score(y_valid, y_pred_svm)
mae = calculate_mean_absolute_error(y_valid, y_pred_svm)
mse = calculate_mean_squared_error(y_valid, y_pred_svm)

# Print evaluation metrics
print("Evaluation Metrics for SVM Classifier:")
print("Classification Accuracy:", acc)
print("Logarithmic Loss:", ll)
print("Confusion Matrix:\n", cm)
print("ROC AUC Score:", roc_auc)
print("F1 Score:", f1)
print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)


In [None]:
# Calculate training accuracy
train_accuracy = svm_model.score(X_train_sampled, y_train_sampled)

# Calculate test accuracy
test_accuracy = svm_model.score(X_valid, y_valid)

print("Training Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)
