In [None]:
import numpy as np
import pandas as pd
import cv2
import functools

import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import train_test_split

from model_testing import ModelSelection
from plotting import show_distribution
from plot_val import calc_roc_curve, plot_roc_curve, plot_confusion_matrix

from apply_processing import apply, resize_images, function_hsv, function_gray
from apply_processing import function_label, blur_images, adjust_contrast_images, rotated_labels
from apply_processing import filter_images, edge_images, contour_images, rotate_images

In [None]:
df = pd.read_csv("Dataset\\images_mushrooms.csv")

df.head()

In [None]:
df.info()

In [None]:
df['Label'].value_counts()

# Processing the images

## No Augmentation

In [None]:
# Fixed contrast, brightness, sharpness

def processing_noAug(df):
    images = df['Image']
    labels = df["Label"]
    
    size = (150, 150)
    
    mod_images = function_hsv(images)
    mod_images = resize_images(mod_images, size)
    mod_images = filter_images(mod_images)
    mod_images = adjust_contrast_images(mod_images, contrast=1.75, brightness=50)
    mod_images = blur_images(mod_images)
    
    return mod_images, labels

images_noAug, labels_noAug = processing_noAug(df)

## Augmentation function

In [None]:
# With Contour + Fixed contrast, brightness, sharpness

def processing_contour(df):
    images = df['Image']
    labels = df['Label']
    
    size = (150, 150)
    contrast = 1.75
    brightness = 50
    
    images_hsv = function_hsv(images)
    images_hsv = resize_images(images_hsv, size)
    images_hsv = filter_images(images_hsv)
    images_hsv = adjust_contrast_images(images_hsv, contrast=contrast, brightness=brightness)
    images_hsv = blur_images(images_hsv)
    
    images_gray = function_gray(images)
    images_gray = resize_images(images_gray, size)
    images_gray = filter_images(images_gray)
    images_gray = adjust_contrast_images(images_gray, contrast=contrast, brightness=brightness)
    images_gray = blur_images(images_gray)
    
    edged_images = edge_images(images_gray)
    
    images_contour = contour_images(edged_images, images_hsv)
    
    return list(images_contour)

In [None]:
# Random Contrast, Brightness, Sharpness

def processing_random(df):
    images = df['Image']
    labels = df['Label']
    
    size = (150, 150)
    contrast = np.random.randint(1, 5)
    brightness = np.random.randint(-100, 100)
    
    images_hsv = function_hsv(images)
    images_hsv = resize_images(images_hsv, size)
    images_hsv = filter_images(images_hsv)
    images_hsv = adjust_contrast_images(images_hsv, contrast=contrast, brightness=brightness)
    images_hsv = blur_images(images_hsv)
    
    return list(images_hsv)

In [None]:
# Fixed contrast, brightness, sharpness

def processing_fixed(df):
    images = df['Image']
    labels = df['Label']
    
    size = (150, 150)
    contrast = 1.75
    brightness = 50
    
    images_hsv = function_hsv(images)
    images_hsv = resize_images(images_hsv, size)
    images_hsv = filter_images(images_hsv)
    images_hsv = adjust_contrast_images(images_hsv, contrast=contrast, brightness=brightness)
    images_hsv = blur_images(images_hsv)
    
    return list(images_hsv)

In [None]:
# Rotated and flipped images

def processing_rotated(df):
    images = df['Image']
    labels = df['Label']
    
    size = (150, 150)
    contrast = 1.75
    brightness = 50
    
    images_hsv = function_hsv(images)
    images_hsv = resize_images(images_hsv, size)
    images_hsv = filter_images(images_hsv)
    images_hsv = adjust_contrast_images(images_hsv, contrast=contrast, brightness=brightness)
    images_hsv = blur_images(images_hsv)
    images_hsv = rotate_images(images_hsv)
    
    return list(images_hsv)

In [None]:
functions = [processing_contour, processing_random, processing_fixed]
labels = [function_label, function_label, function_label]

images_aug, labels_aug = apply(functions, labels, df)

# Images info

In [None]:
print(f"Length of images no augmentation {len(images_noAug)}")
print(f"Length of images with augmentation {len(images_aug)}")

## Images info no augmentation

In [None]:
# show_distribution(images_noAug)

## Images info augmentation

In [None]:
# show_distribution(images_aug)

# Modelling with no augmentation

## Reduction with LDA

In [None]:
scaler = MinMaxScaler()
le = LabelEncoder()

lda = LinearDiscriminantAnalysis()

In [None]:
images = [np.ravel(image) for image in images_noAug]

images_scaled = scaler.fit_transform(images)
labels_encoded = le.fit_transform(df["Label"])

In [None]:
lda.fit(images_scaled, labels_encoded)
images_lda = lda.transform(images_scaled)

images_lda_df = pd.DataFrame(data=images_lda, columns=["First LDA", "Second LDA", "Third LDA"])
images_lda_df["Label"] = le.inverse_transform(labels_encoded)

In [None]:
fig = plt.figure(figsize=(15,9))

ax = fig.add_subplot(projection='3d')

for label in images_lda_df["Label"].unique():
    first_lda = images_lda_df[images_lda_df["Label"] == label]["First LDA"]
    second_lda = images_lda_df[images_lda_df["Label"] == label]["Second LDA"]
    third_lda = images_lda_df[images_lda_df["Label"] == label]["Third LDA"]
    
    ax.scatter(first_lda, second_lda, third_lda, label=label)
    
ax.legend()

ax.set_xlabel("First LDA")
ax.set_ylabel("Second LDA")
ax.set_zlabel("Third LDA")

plt.show()

## Modelling

In [None]:
X = images_pca_df.drop(columns=["Label"])
y = pd.Series(le.fit_transform(images_pca_df["Label"]))

### Using train_test_split

In [None]:
selection = ModelSelection(X, y, [RandomForestClassifier(), DecisionTreeClassifier(), MLPClassifier(), SVC()])

In [None]:
selection.use_train_test_split()

### Using StratifiedKFold

In [None]:
selection = ModelSelection(X, y, [RandomForestClassifier(), DecisionTreeClassifier(), MLPClassifier(), SVC()])

In [None]:
selection.use_stratifiedkfold()

## Evaluation

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
svm = SVC(probability=True)
dtc = DecisionTreeClassifier()
mlpc = MLPClassifier()
rfc = RandomForestClassifier()

In [None]:
svm.fit(X_train, y_train)
dtc.fit(X_train, y_train)
mlpc.fit(X_train, y_train)
rfc.fit(X_train, y_train)

### ROC Curve and AUC

In [None]:
svm_proba = svm.predict_proba(X_test)
dtc_proba = dtc.predict_proba(X_test)
mlpc_proba = mlpc.predict_proba(X_test)
rfc_proba = rfc.predict_proba(X_test)

In [None]:
fprs_svm, tprs_svm, aucs_svm = calc_roc_curve(y_test, svm_proba)
fprs_dtc, tprs_dtc, aucs_dtc = calc_roc_curve(y_test, dtc_proba)
fprs_mlpc, tprs_mlpc, aucs_mlpc = calc_roc_curve(y_test, mlpc_proba)
fprs_rfc, tprs_rfc, aucs_rfc = calc_roc_curve(y_test, rfc_proba)

In [None]:
fig = plt.figure(figsize=(15, 9))

plot_roc_curve(fprs_svm, tprs_svm, aucs_svm, "SVC")

In [None]:
fig = plt.figure(figsize=(15, 9))

plot_roc_curve(fprs_rfc, tprs_rfc, aucs_rfc, "RF Classifier")

In [None]:
fig = plt.figure(figsize=(15, 9))

plot_roc_curve(fprs_mlpc, tprs_mlpc, aucs_mlpc, "MLP Classifier")

In [None]:
fig = plt.figure(figsize=(15, 9))

plot_roc_curve(fprs_dtc, tprs_dtc, aucs_dtc, "DT Classifier")

### Confusion Matrix

In [None]:
svm_pred = svm.predict(X_test)
dtc_pred = dtc.predict(X_test)
mlpc_pred = mlpc.predict(X_test)
rfc_pred = rfc.predict(X_test)

In [None]:
fig = plt.figure(figsize=(15, 9))

plot_confusion_matrix(df["Label"], y_test, svm_pred, 'SVC')

In [None]:
fig = plt.figure(figsize=(15, 9))

plot_confusion_matrix(df["Label"], y_test, dtc_pred, 'DT Classifier')

In [None]:
fig = plt.figure(figsize=(15, 9))

plot_confusion_matrix(df["Label"], y_test, mlpc_pred, 'MLP Classifier')

In [None]:
fig = plt.figure(figsize=(15, 9))

plot_confusion_matrix(df["Label"], y_test, rfc_pred, 'RF Classifier')

# Modelling with augmentation

## Reduction with LDA

In [None]:
scaler = MinMaxScaler()
le = LabelEncoder()

lda = LinearDiscriminantAnalysis()

In [None]:
images_aug = [np.ravel(image) for image in images_aug]

images_scaled = scaler.fit_transform(images_aug)
labels_encoded = le.fit_transform(labels_aug)

In [None]:
lda.fit(images_scaled, labels_encoded)

images_lda = lda.transform(images_scaled)

images_lda_df = pd.DataFrame(data=images_lda, columns=["First LDA", "Second LDA", "Third LDA"])
images_lda_df["Label"] = le.inverse_transform(labels_encoded)

In [None]:
fig = plt.figure(figsize=(15,9))

ax = fig.add_subplot(projection='3d')

for label in images_lda_df["Label"].unique():
    first_lda = images_lda_df[images_lda_df["Label"] == label]["First LDA"]
    second_lda = images_lda_df[images_lda_df["Label"] == label]["Second LDA"]
    third_lda = images_lda_df[images_lda_df["Label"] == label]["Third LDA"]
    
    ax.scatter(first_lda, second_lda, third_lda, label=label)
    
ax.legend()

ax.set_xlabel("First LDA")
ax.set_ylabel("Second LDA")
ax.set_zlabel("Third LDA")

plt.show()

## Modelling

In [None]:
X = images_pca_df.drop(columns=["Label"])
y = pd.Series(le.fit_transform(images_pca_df["Label"]))

### Using train_test_split

In [None]:
selection = ModelSelection(X, y, [RandomForestClassifier(), DecisionTreeClassifier(), MLPClassifier(), SVC()])

In [None]:
selection.use_train_test_split()

### Using StratifiedKFold

In [None]:
selection = ModelSelection(X, y, [RandomForestClassifier(), DecisionTreeClassifier(), MLPClassifier(), SVC()])

In [None]:
selection.use_stratifiedkfold()

## Evaluation

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
svm = SVC(probability=True)
dtc = DecisionTreeClassifier()
mlpc = MLPClassifier()
rfc = RandomForestClassifier()

In [None]:
svm.fit(X_train, y_train)
dtc.fit(X_train, y_train)
mlpc.fit(X_train, y_train)
rfc.fit(X_train, y_train)

### ROC Curve and AUC

In [None]:
svm_proba = svm.predict_proba(X_test)
dtc_proba = dtc.predict_proba(X_test)
mlpc_proba = mlpc.predict_proba(X_test)
rfc_proba = rfc.predict_proba(X_test)

In [None]:
fprs_svm, tprs_svm, aucs_svm = calc_roc_curve(y_test, svm_proba)
fprs_dtc, tprs_dtc, aucs_dtc = calc_roc_curve(y_test, dtc_proba)
fprs_mlpc, tprs_mlpc, aucs_mlpc = calc_roc_curve(y_test, mlpc_proba)
fprs_rfc, tprs_rfc, aucs_rfc = calc_roc_curve(y_test, rfc_proba)

In [None]:
fig = plt.figure(figsize=(15, 9))

plot_roc_curve(fprs_svm, tprs_svm, aucs_svm, "SVC")

In [None]:
fig = plt.figure(figsize=(15, 9))

plot_roc_curve(fprs_dtc, tprs_dtc, aucs_dtc, "DT Classifier")

In [None]:
fig = plt.figure(figsize=(15, 9))

plot_roc_curve(fprs_mlpc, tprs_mlpc, aucs_mlpc, "MLP Classifier")

In [None]:
fig = plt.figure(figsize=(15, 9))

plot_roc_curve(fprs_rfc, tprs_rfc, aucs_rfc, "RF Classifier")

### Confusion Matrix

In [None]:
svm_pred = svm.predict(X_test)
dtc_pred = dtc.predict(X_test)
mlpc_pred = mlpc.predict(X_test)
rfc_pred = rfc.predict(X_test)

In [None]:
fig = plt.figure(figsize=(15, 9))

plot_confusion_matrix(df["Label"], y_test, svm_pred, 'SVC')

In [None]:
fig = plt.figure(figsize=(15, 9))

plot_confusion_matrix(df["Label"], y_test, dtc_pred, 'DT Classifier')

In [None]:
fig = plt.figure(figsize=(15, 9))

plot_confusion_matrix(df["Label"], y_test, mlpc_pred, 'MLP Classifier')

In [None]:
fig = plt.figure(figsize=(15, 9))

plot_confusion_matrix(df["Label"], y_test, rfc_pred, 'RF Classifier')