# Final document with all the plots

In [None]:
import dt 
#import cnn 
import nn 
from load_data import load_data
import os
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report

In [None]:
X_train, y_train, X_test, y_test = load_data()

# Exploratory data analysis

In [None]:
savefigs = True
if savefigs:
    if not os.path.isdir('figs'):
        os.makedirs('figs')


#if savefigs: plt.savefig('../figs/DegreeDistribution.png', bbox_inches = 'tight')

In [None]:
sns.set_theme(style='whitegrid', palette="flare")
NAMES = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Shirt']

In [None]:

def plot_y_distribution(y, t):
    ''' Plot the distribution of the labels '''
    classes, classes_counts = np.unique(y, return_counts=True)

    fig, ax = plt.subplots(figsize=(10, 6))
    sns.barplot(x=classes, y=classes_counts, ax=ax);
    ax.set_title(f'Class distribution of {t}', fontsize=16, fontweight='bold');
    ax.set_ylabel('Count');
    ax.set_xticklabels(NAMES);



plot_y_distribution(y_train, "Training data set")
#if savefigs: plt.savefig('figs/distributiontrainingdataset.png', bbox_inches = 'tight')

In [None]:
plot_y_distribution(y_test, "Test data set")
#if savefigs: plt.savefig('figs/distributiontestdataset.png', bbox_inches = 'tight')

In [None]:
def plot_images(X, y, measure='mean'):
    ''' Plot the mean, median, or std of the images'''
    classes = np.unique(y)
    fig, ax = plt.subplots(1, len(classes), figsize=(20, 8))
    for c in classes:
        imgs = X[y == c]

        if measure == 'mean': av_img = np.mean(imgs, axis=0).reshape(28, 28)
        elif measure == 'median': av_img = np.median(imgs, axis=0).reshape(28, 28)
        elif measure == 'std': av_img = np.std(imgs, axis=0).reshape(28, 28)

        ax[c].imshow(av_img, cmap='gray')
        ax[c].set_title(NAMES[c], fontsize=16, fontweight='bold')


plot_images(X_train, y_train)
#if savefigs: plt.savefig('figs/mean.png', bbox_inches = 'tight')
plot_images(X_train, y_train, measure='std')
#if savefigs: plt.savefig('figs/std.png', bbox_inches = 'tight')
plot_images(X_train, y_train, measure='median')
#if savefigs: plt.savefig('figs/median.png', bbox_inches = 'tight')

In [None]:
def plot_color_distribution(X, y):
    ''' Plot the distribution of the colors '''
    classes = np.unique(y)
    av_imgs = np.zeros((len(classes), 28, 28))
    for c in classes:
        imgs = X[y == c]
        av_imgs[c] = np.mean(imgs, axis=0).reshape(28, 28)

    #plot the distribution of the average image
    fig, ax = plt.subplots(1, len(classes), figsize=(30, 6))
    for c in classes:
        sns.histplot(av_imgs[c].flatten(), ax=ax[c], kde=True)
        ax[c].set_title(NAMES[c], fontsize=16, fontweight='bold')
        ax[c].set_xlabel('Pixel value')
        ax[c].set_ylabel('Count')

plot_color_distribution(X_train, y_train)
#if savefigs: plt.savefig('figs/colordistribution.png', bbox_inches = 'tight')

# Neural Network

In [None]:
def make_plots(data, labels, activationfunction):
    fig, axes = plt.subplots(1,2, figsize = (20, 5))
    for i, label in enumerate(labels):
        colors_taratt = ['#761878', '#826fc9', '#b1c2f0' ]
        sns.lineplot(x = 'epoch', y = label, palette = colors_taratt, data = data, ax = axes.flat[i]);
        axes.flat[i].set_title(f' History of {label} with {activationfunction}', size = 13)
        axes.flat[i].set(ylabel= f'{label}', xlabel = 'Number of epochs');

In [None]:
nn_leakyrelu = nn.NeuralNetwork(test=True, activation_name='leaky_relu')
df_leakyrelu = nn_leakyrelu.TRAIN(X_train, y_train, epochs=200, testing=True)
nn_leakyrelu.TEST(X_test, y_test)
make_plots(df_leakyrelu, ["accuracy", "loss"], "leaky_relu")
if savefigs: plt.savefig('figs/nn_leakyrelu.png', bbox_inches = 'tight')

In [None]:
nn_sigmoid= nn.NeuralNetwork(test=True, activation_name='sigmoid')
df_sigmoid = nn_sigmoid.TRAIN(X_train, y_train, epochs=200, testing=True)
nn_leakyrelu.TEST(X_test, y_test)
make_plots(df_sigmoid,["accuracy", "loss"], "sigmoid" )
if savefigs: plt.savefig('figs/nn_sigmoid.png', bbox_inches = 'tight')

# Decision Tree

In [None]:
x = StandardScaler().fit_transform(X_train)
pca = PCA(n_components=3)
pcax = pca.fit_transform(x)
df = pd.DataFrame(data = pcax, columns= ['pc 1', 'pc 2', 'pc 3'])
df = pd.concat([df, pd.DataFrame(y_train, columns = ['y'])], axis = 1)

x_t = StandardScaler().fit_transform(X_test)
pca = PCA(n_components=3)
pcax = pca.fit_transform(x_t)
df_t = pd.DataFrame(data = pcax, columns= ['pc 1', 'pc 2', 'pc 3'])
df_t = pd.concat([df_t, pd.DataFrame(y_test, columns = ['y'])], axis = 1)

#### PCA for Decision Tree

In [None]:
df.head()

In [None]:
colours = ['#F59FB8', '#FFCD6D', '#B84543', '#D98E4D', '#8F7CB2']
fig, ax = plt.subplots(figsize = (6,6)) 
ax.set_xlabel('Principal Component 1', fontsize = 15)
ax.set_ylabel('Principal Component 2', fontsize = 15)
ax.set_title('2 component PCA', fontsize = 20)
targets = [0, 1, 2, 3, 4]
for target, color in zip(targets,colours):
    indicesToKeep = df['y'] == target
    ax.scatter(df.loc[indicesToKeep, 'pc 2']
               , df.loc[indicesToKeep, 'pc 3']
               , c = color
               , s = 50)
ax.legend(NAMES);
ax.grid();

In [None]:
fig = plt.figure(figsize = (10,10))
ax = plt.axes(projection='3d')

for target, color in zip(targets,colours):
    indicesToKeep = df['y'] == target
    ax.scatter(df.loc[indicesToKeep, 'pc 1'], 
                df.loc[indicesToKeep, 'pc 2'],
                df.loc[indicesToKeep, 'pc 3'],
                c = color)

ax.set_xlabel('Principal Component 1', labelpad=15);
ax.set_ylabel('Principal Component 2', labelpad = 15);
ax.set_zlabel('Principal Component 3');
plt.subplots_adjust(right = 0.2)
ax.legend(NAMES);
plt.tight_layout();

if savefigs:plt.savefig('figs/pca.png', bbox_inches = 'tight')

In [None]:
X_train = df.iloc[:, :-1].values
Y_train= df.iloc[:, -1].values.reshape(-1,1)

X_test = df_t.iloc[:, :-1].values
Y_test = df_t.iloc[:, -1].values.reshape(-1,1)

In [None]:
classifier = dt.DecisionTreeClassifier(min_samples_split=3, max_depth=3)
classifier.fit(X_train,Y_train)
Y_pred = classifier.predict(X_test)

In [None]:
print(accuracy_score(Y_test, Y_pred))

In [None]:

l = list()
for i in range(1,20):
    classifier = dt.DecisionTreeClassifier(min_samples_split=3, max_depth=i)
    classifier.fit(X_train,Y_train)
    Y_pred = classifier.predict(X_test)
    
    l.append([i, accuracy_score(Y_test, Y_pred)])

dt_dataframe =pd.DataFrame(data=l, columns=['depht', 'accuracy'])
    