Krantas Konstantinos, 9975  
Strikos Konstantinos, 9517

# **Question 4 - NN Testing**

In [8]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout
from keras.regularizers import l2
from keras.optimizers import RMSprop
from keras.callbacks import EarlyStopping, ModelCheckpoint

df = pd.read_csv('datasetC.csv')
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values - 1  # Subtract 1 to make the range [0, 4]

# Manage feature values
scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)

# Create the MLP model
model = Sequential()
model.add(Dense(units=32, activation='relu', input_dim=X_train.shape[1], kernel_regularizer=l2(0.03)))
model.add(Dropout(0.2))
model.add(Dense(units=16, activation='relu'))
model.add(Dense(units=5, activation='softmax'))

# Verification
model.compile(optimizer=RMSprop(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Implement EarlyStopping and ModelCheckpoint
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model', monitor='val_loss', save_best_only=True)
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.20, shuffle=True,
                    callbacks=[early_stopping, model_checkpoint])

test_data = pd.read_csv('datasetCTest.csv')
X_test = scaler.transform(test_data.values)

# Predictions using the best model saved by ModelCheckpoint
best_model = load_model('best_model')
predictions = best_model.predict(X_test)

# Create the numpy array
labels5 = np.argmax(predictions, axis=1) + 1  # Add 1 to revert back to the original range [1, 5]
np.save('labels5.npy', labels5)

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_15 (Dense)            (None, 32)                12832     
                                                                 
 dropout_5 (Dropout)         (None, 32)                0         
                                                                 
 dense_16 (Dense)            (None, 16)                528       
                                                                 
 dense_17 (Dense)            (None, 5)                 85        
                                                                 
Total params: 13445 (52.52 KB)
Trainable params: 13445 (52.52 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20


# Classifiers used with worse accuracy

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

# Manage feature values
scaler = StandardScaler()
X = scaler.fit_transform(X)

X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values - 1  # Subtract 1 to make the range [0, 4]
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Decision Tree Classifier
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

# Predictions on validation data
pred_dt = dt.predict(X_val)

# Calculate accuracy
accuracy_dt = accuracy_score(y_val, pred_dt)
print(f"Decision Tree Classifier Accuracy: {accuracy_dt}")

# Train the XGBoost Classifier
xgb = XGBClassifier(n_estimators=400, eta=0.6, random_state=42, max_depth=3)
xgb.fit(X_train, y_train)

# Predictions on validation data
pred_xgb = xgb.predict(X_val)

# Calculate accuracy
accuracy_xgb = accuracy_score(y_val, pred_xgb)
print(f"XGBoost Classifier Accuracy: {accuracy_xgb}")

# Train the Random Forest Classifier
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)

# Predictions on validation data
pred_rf = rf.predict(X_val)

# Calculate accuracy
accuracy_rf = accuracy_score(y_val, pred_rf)
print(f"Random Forest Classifier Accuracy: {accuracy_rf}")

# Train the Logistic Regression Classifier
# Even with 300 iterations there was a warning that the limit was reached
logistic = LogisticRegression(random_state=42, solver='saga', penalty='l2',
                              max_iter=300)
logistic.fit(X_train, y_train)

# Predictions on validation data
pred_logistic = logistic.predict(X_val)

# Calculate accuracy
accuracy_logistic = accuracy_score(y_val, pred_logistic)
print(f"Logistic Regression Classifier Accuracy: {accuracy_logistic}")

# Train the k Nearest Neighbors Classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Predictions on validation data
pred_knn = knn.predict(X_val)

# Calculate accuracy
accuracy_knn = accuracy_score(y_val, pred_knn)
print(f"k-NN Classifier Accuracy: {accuracy_knn}")