## Multiclass classification

### The Iris dataset

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam, SGD

In [2]:
dataset_path = 'datasets/glass.csv'
df = pd.read_csv(dataset_path)

In [3]:
import seaborn as sns
# sns.pairplot(df, hue="stock_symbol")

In [4]:
df.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [5]:
X = df.drop('Type', axis=1)
X.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0


In [6]:
target_names = df['Type'].unique()
target_names

array([1, 2, 3, 5, 6, 7], dtype=int64)

In [7]:
target_dict = {n:i for i, n in enumerate(target_names)}
target_dict

{1: 0, 2: 1, 3: 2, 5: 3, 6: 4, 7: 5}

In [8]:
y= df['Type'].map(target_dict)
y.head()

0    0
1    0
2    0
3    0
4    0
Name: Type, dtype: int64

In [9]:
from tensorflow.keras.utils import to_categorical

In [10]:
y_cat = to_categorical(y)

In [11]:
y_cat[:1000]

array([[1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       ...,
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1.]])

In [12]:
from sklearn.model_selection import train_test_split

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X.values, y_cat,
                                                   test_size=0.5)

In [14]:
model = Sequential()
model.add(Dense(6, input_shape=(9,), activation='softmax'))
model.compile(Adam(learning_rate=0.1),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
model.fit(X_train, y_train, epochs=50, validation_split=0.1)

Epoch 1/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 71ms/step - accuracy: 0.2096 - loss: 18.2279 - val_accuracy: 0.1818 - val_loss: 12.1601
Epoch 2/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.3789 - loss: 7.6920 - val_accuracy: 0.2727 - val_loss: 7.4378
Epoch 3/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.3398 - loss: 6.3806 - val_accuracy: 0.2727 - val_loss: 8.3023
Epoch 4/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.3203 - loss: 4.0919 - val_accuracy: 0.1818 - val_loss: 13.1967
Epoch 5/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4271 - loss: 4.4197 - val_accuracy: 0.4545 - val_loss: 13.9435
Epoch 6/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4635 - loss: 5.2740 - val_accuracy: 0.1818 - val_loss: 12.9983
Epoch 7/50
[1m3/3[0m [32m━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1f7125f1940>

In [16]:
y_pred = model.predict(X_test)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 


In [17]:
y_pred[:5]

array([[7.0780134e-01, 1.9195671e-01, 3.7242610e-02, 1.3357910e-02,
        2.5831670e-02, 2.3809688e-02],
       [8.8905375e-07, 4.1374108e-03, 1.6566688e-05, 2.9288523e-04,
        3.9934897e-04, 9.9515295e-01],
       [6.4990407e-01, 1.6194125e-01, 5.1659323e-02, 9.5757917e-03,
        7.8710988e-02, 4.8208572e-02],
       [5.5536915e-02, 4.6122256e-01, 1.4469168e-02, 4.3518507e-01,
        3.3404347e-02, 1.8191358e-04],
       [2.3110013e-01, 3.8753697e-01, 2.5315922e-01, 7.9865120e-03,
        5.8316808e-02, 6.1900336e-02]], dtype=float32)

In [18]:
y_test_class = np.argmax(y_test, axis=1)
y_pred_class = np.argmax(y_pred, axis=1)

In [19]:
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, confusion_matrix

In [20]:
print(classification_report(y_test_class, y_pred_class))

              precision    recall  f1-score   support

           0       0.51      0.97      0.67        37
           1       0.55      0.15      0.24        40
           2       0.00      0.00      0.00         5
           3       0.50      0.20      0.29        10
           4       0.83      1.00      0.91         5
           5       0.67      1.00      0.80        10

    accuracy                           0.55       107
   macro avg       0.51      0.55      0.48       107
weighted avg       0.53      0.55      0.46       107



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [21]:
confusion_matrix(y_test_class, y_pred_class)

array([[36,  1,  0,  0,  0,  0],
       [30,  6,  0,  2,  1,  1],
       [ 5,  0,  0,  0,  0,  0],
       [ 0,  4,  0,  2,  0,  4],
       [ 0,  0,  0,  0,  5,  0],
       [ 0,  0,  0,  0,  0, 10]], dtype=int64)

### Conclusie 
# tot welke type glas behoord de data? 


wij hebben veel verschillende layers meerdere neurons dit blijkt het beste resultaat te zijn. de accuracy is niet hoger te krijgen dan 60% aangezien de waardes te dicht op elkaar liggen. En dit beinvloed de accuracy ontzettend. 
