In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

print(f'Tensorflow version: {tf.__version__}')

## Get the Data

In [None]:
glass_data = pd.read_csv('../input/glass/glass.csv', parse_dates=True, encoding = "cp1252")
glass_data.head()

In [None]:
glass_data.groupby('Type').count().reset_index()

In [None]:
glass_data['Type'].replace(to_replace={1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6}, inplace=True)

In [None]:
fig = plt.figure(figsize = (10,6))
sns.countplot(data=glass_data, x='Type')

### Test for correlation

In [None]:
corr = glass_data.corr(method = "pearson")
# corr = glass_data.corr(method = "spearman")
# corr = glass_data.corr(method = "kendall")

f, ax = plt.subplots(figsize=(10, 10))

sns.heatmap(corr, mask=np.zeros_like(corr, dtype=np.bool), cmap=sns.diverging_palette(220, 10, as_cmap=True), square=True, ax=ax, annot=True)

### Split up the data to training set and test set

In [None]:
X = glass_data[['RI','Na','Mg','Al','Si','K','Ca','Ba','Fe']]

y = glass_data['Type']

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

### Normalization of the data

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Train the model

In [None]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(units=150, input_shape=(X_train.shape[1],), activation='relu'),
  tf.keras.layers.Dense(units=300, activation='relu'),
  tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(units=750, activation='relu'),
  tf.keras.layers.Dropout(0.7),
  tf.keras.layers.Dense(units=1350, activation='relu'),
  tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(units=250, activation='relu'),
  tf.keras.layers.BatchNormalization(),
  tf.keras.layers.Dense(units=350, activation='relu'),
  tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(units=1500, activation='relu'),
  tf.keras.layers.Dropout(0.7),
  tf.keras.layers.Dense(units=1300, activation='relu'),
  tf.keras.layers.Dropout(0.7),
  tf.keras.layers.Dense(units=750, activation='relu'),
  tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(units=250, activation='relu'),
  tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(units=7, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
cl = model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=90)

In [None]:
fig, ax = plt.subplots(figsize=(15,5))

plt.plot(cl.history['accuracy'], label='accuracy')
plt.plot(cl.history['val_accuracy'], label='val_accuracy', linestyle='--')
plt.plot(cl.history['loss'], label='loss')
plt.plot(cl.history['val_loss'], label='val_loss', linestyle='--')
plt.legend()

In [None]:
ModelLoss, ModelAccuracy = model.evaluate(X_test, y_test)

print(f'Test Loss is {ModelLoss}')
print(f'Test Accuracy is {ModelAccuracy}')

In [None]:
y_pred = model.predict(X_test)
y_test_list=list(y_test)
total=len(y_test_list)
correct=0

# for i in range(len(y_test_list)):
#   print(f'{i+1} - {y_pred[4][i]:.3f} - {y_test_list[4]}')
#   if np.argmax(y_pred[i])+1==y_test_list[i]:
#     print(f'{i+1} - {np.argmax(y_pred[i])} - {y_test_list[i]}')

for i in range(total):
  # print(f'{np.argmax(y_pred[i])} - {np.amax(y_pred[i])} - {y_test_list[i]}')
  if(np.argmax(y_pred[i])==y_test_list[i]):
    correct+=1
    
print(f'{correct}/{total}')
print(correct/total)

### Confustion matrix

In [None]:
p_test = model.predict(X_test).argmax(axis=1)
cm = tf.math.confusion_matrix(y_test, p_test)

f, ax = plt.subplots(figsize=(7, 5))
sns.heatmap(cm, annot=True, cmap='Blues', square=True, linewidths=0.01, linecolor='grey')
plt.title('Confustion matrix')
plt.ylabel('True label')
plt.xlabel('Predicted label')

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier, ExtraTreesClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import cross_validate, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score
import warnings
warnings.filterwarnings("ignore")

models=[("Logistic Regression",LogisticRegression()),
        ("Linear Discriminant Analysis",LinearDiscriminantAnalysis()),
        ("Decision Tree",DecisionTreeClassifier()),
        ("Random Forest",RandomForestClassifier()),
        ("Extra Trees",ExtraTreesClassifier()),
        ("Gradient Boostin",GradientBoostingClassifier()),
        ("KNeighbors",KNeighborsClassifier()),
        ("SVM",SVC()),
        ("Gaussian Naive Bayes",GaussianNB()),
        ("Ada Boost",AdaBoostClassifier())]

    
for name, model in models:
    results = cross_val_score(model, X_train, y_train.values.ravel(), cv=10, scoring='accuracy')
    print(f"\x1b[94m{name}\x1b[0m: \x1b[95m{results.mean():.4f}\x1b[0m ± {results.std():.4f}")