In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn import decomposition

In [None]:
df = pd.read_csv('preprocess.csv')
df = df.astype('float16')

In [None]:
X = df.loc[:, df.columns != 'prob']
y = df['prob']

In [None]:
y_one = pd.get_dummies(y)
y_one

## Original Data ##

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_one, test_size=0.3, shuffle=True)

In [None]:
model = tf.keras.Sequential()
model.add(Dense(1024, input_shape=(X.shape[1],), activation='relu'))
model.add(Dense(1024, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(5, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=[tf.keras.metrics.CategoricalAccuracy()])
model.summary()

In [None]:
history = model.fit(X_train, y_train, batch_size=20, epochs=100, verbose=2, validation_data=(X_test,y_test))

In [None]:
plt.plot(history.history['categorical_accuracy'])
plt.plot(history.history['val_categorical_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.show()

## Values Scaled ##

In [None]:
scaled_values = StandardScaler().fit_transform(X)
X2 = pd.DataFrame(scaled_values, columns = X.columns)
X2

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X2, y_one, test_size=0.3, shuffle=True)

In [None]:
modelS = tf.keras.Sequential()
modelS.add(Dense(1024, input_shape=(X.shape[1],), activation='relu'))
modelS.add(Dense(1024, activation='relu'))
modelS.add(Dense(512, activation='relu'))
modelS.add(Dense(512, activation='relu'))
modelS.add(Dense(256, activation='relu'))
modelS.add(Dense(128, activation='relu'))
modelS.add(Dense(64, activation='relu'))
modelS.add(Dense(32, activation='relu'))
modelS.add(Dense(16, activation='relu'))
modelS.add(Dense(5, activation='softmax'))
modelS.compile(optimizer='adam', loss='categorical_crossentropy', metrics=[tf.keras.metrics.CategoricalAccuracy()])
modelS.summary()

In [None]:
history = modelS.fit(X_train, y_train, batch_size=20, epochs=100, verbose=2, validation_data=(X_test,y_test))

In [None]:
plt.plot(history.history['categorical_accuracy'])
plt.plot(history.history['val_categorical_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.show()

## Values Scaled and PCA applied ##

In [None]:
n_components = 5
pca = decomposition.PCA(n_components=n_components)
pca.fit(X2)


eigenvalues = sorted(list(pca.explained_variance_ratio_), reverse=True)
n_pcs= pca.components_.shape[0]
most_important = [np.abs(pca.components_[i]).argmax() for i in range(n_pcs)]
initial_feature_names = X.columns
most_important_names = [initial_feature_names[most_important[i]] for i in range(n_pcs)]


X_pca = pd.DataFrame(pca.transform(X2), columns=most_important_names)
X_pca

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_pca, y_one, test_size=0.3, shuffle=True)

In [None]:
modelSP = tf.keras.Sequential()
modelSP.add(Dense(1024, input_shape=(X_pca.shape[1],), activation='relu'))
modelSP.add(Dense(1024, activation='relu'))
modelSP.add(Dense(512, activation='relu'))
modelSP.add(Dense(512, activation='relu'))
modelSP.add(Dense(256, activation='relu'))
modelSP.add(Dense(128, activation='relu'))
modelSP.add(Dense(64, activation='relu'))
modelSP.add(Dense(32, activation='relu'))
modelSP.add(Dense(16, activation='relu'))
modelSP.add(Dense(5, activation='softmax'))
modelSP.compile(optimizer='adam', loss='categorical_crossentropy', metrics=[tf.keras.metrics.CategoricalAccuracy()])
modelSP.summary()

In [None]:
history = modelSP.fit(X_train, y_train, batch_size=20, epochs=200, verbose=2, validation_data=(X_test,y_test))

In [None]:
plt.plot(history.history['categorical_accuracy'])
plt.plot(history.history['val_categorical_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.show()