<a href="https://colab.research.google.com/github/Jeremy1999de/MachineLearningBRNO/blob/main/Machine_Learning_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Import the data



In [16]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [17]:
import pandas as pd

# Load the training and test data into pandas DataFrames
x_train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/ML Project/x_train.csv')
x_test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/ML Project/x_test.csv')
y_train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/ML Project/y_train.csv')

# Print the data types of each column in the training data
print(x_train.dtypes)


# Print the data type of the target column in the training data
print(y_train.dtypes)


Unnamed: 0      int64
cfo_demod     float64
gain_imb      float64
iq_imb        float64
or_off        float64
quadr_err     float64
m_power       float64
ph_err        float64
mag_err       float64
evm           float64
Tosc          float64
Tmix          float64
dtype: object
Unnamed: 0    int64
target        int64
dtype: object


#Preprocessing 


In [18]:
from sklearn.preprocessing import StandardScaler
x_train = x_train.drop(['m_power', 'Tosc', 'Tmix'], axis=1)
x_test = x_test.drop(['m_power', 'Tosc', 'Tmix'], axis=1)

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)


In [19]:
from sklearn.model_selection import train_test_split
import tensorflow as tf
# One-hot encode the target values
y_train_one_hot = tf.keras.utils.to_categorical(y_train['target'] - 1)


x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(x_train_scaled, y_train_one_hot, test_size=0.2, random_state=42)



#Construction of the model

In [20]:
def build_mlp_model(input_dim, num_classes, hidden_layers, activation_function="relu", output_activation="softmax", dropout_rate=0.5, l1_coeff=0.001, l2_coeff=0.001):
    model = Sequential()
    
    for i, layer in enumerate(hidden_layers):
        if i == 0:
            model.add(Dense(layer, input_dim=input_dim, activation=activation_function, kernel_regularizer=l1_l2(l1=l1_coeff, l2=l2_coeff)))
        else:
            model.add(Dense(layer, activation=activation_function, kernel_regularizer=l1_l2(l1=l1_coeff, l2=l2_coeff)))
        model.add(Dropout(dropout_rate))
            
    model.add(Dense(num_classes, activation=output_activation))
    
    return model

In [21]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dropout
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.callbacks import EarlyStopping

hidden_layers_list = [
    [128, 64, 32],
    [64, 64],
    [32, 32, 32, 32]
]

for hidden_layers in hidden_layers_list:
    print(f"Training MLP model with hidden layers: {hidden_layers}")
    model = build_mlp_model(input_dim=x_train_scaled.shape[1], num_classes=len(np.unique(y_train['target'])), hidden_layers=hidden_layers)
    
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    
    history = model.fit(x_train_split, y_train_split, validation_data=(x_val_split, y_val_split), epochs=100, batch_size=128, verbose=0, callbacks=[early_stopping])
    
    print("Training accuracy: {:.4f}".format(history.history['accuracy'][-1]))
    print("Validation accuracy: {:.4f}".format(history.history['val_accuracy'][-1]))
    print("\n")


Training MLP model with hidden layers: [128, 64, 32]
Training accuracy: 0.9762
Validation accuracy: 0.9987


Training MLP model with hidden layers: [64, 64]
Training accuracy: 0.9921
Validation accuracy: 0.9987


Training MLP model with hidden layers: [32, 32, 32, 32]
Training accuracy: 0.9355
Validation accuracy: 0.9964




In [22]:
# This code trains three different MLP models with varying numbers of hidden layers, neurons in the hidden layers,  and regularization techniques. The output will show the training and validation accuracy for each model. 
# The model uses L1 and L2 regularization, dropout regularization, and early stopping to improve performance and prevent overfitting.


'\nThis code trains three different MLP models with varying numbers of hidden layers, neurons in the hidden layers, \nand regularization techniques. The output will show the training and validation accuracy for each model. \nThe model uses L1 and L2 regularization, dropout regularization, and early stopping to improve performance and prevent overfitting.\n'

#Test and submission


In [23]:

best_hidden_layers = [64, 64]  # Replace this with the best model architecture you found earlier

print(f"Training MLP model with hidden layers: {best_hidden_layers}")
best_model = build_mlp_model(input_dim=x_train_scaled.shape[1], num_classes=len(np.unique(y_train['target'])), hidden_layers=best_hidden_layers)

best_model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = best_model.fit(x_train_scaled, y_train_one_hot, epochs=100, batch_size=128, verbose=0, callbacks=[early_stopping])
print("Training accuracy: {:.4f}".format(history.history['accuracy'][-1]))

y_test_pred = best_model.predict(x_test_scaled)
y_test_pred_labels = np.argmax(y_test_pred, axis=1) + 1  # Convert the predictions back to original class labels
submission = pd.DataFrame({'id': x_test['Unnamed: 0'], 'target': y_test_pred_labels})
submission.to_csv('submission.csv', index=False)

Training MLP model with hidden layers: [64, 64]




Training accuracy: 0.9895


#SVM

In [24]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
import time
from sklearn.metrics import accuracy_score, classification_report

x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(x_train_scaled, y_train['target'], test_size=0.2, random_state=42)

param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

svm = SVC()
grid_search = GridSearchCV(svm, param_grid, cv=5, verbose=2, n_jobs=-1)
grid_search.fit(x_train_split, y_train_split)

best_svm = grid_search.best_estimator_
y_val_pred = best_svm.predict(x_val_split)
val_accuracy = accuracy_score(y_val_split, y_val_pred)
print(f"SVM Validation accuracy: {val_accuracy:.4f}")

start_time_svm = time.time()
best_svm.fit(x_train_scaled, y_train['target'])
end_time_svm = time.time()
svm_train_time = end_time_svm - start_time_svm

start_time_mlp = time.time()
best_model.fit(x_train_scaled, y_train_one_hot, epochs=100, batch_size=128, verbose=0)
end_time_mlp = time.time()
mlp_train_time = end_time_mlp - start_time_mlp

print(f"SVM training time: {svm_train_time:.4f} seconds")
print(f"MLP training time: {mlp_train_time:.4f} seconds")

Fitting 5 folds for each of 12 candidates, totalling 60 fits
SVM Validation accuracy: 1.0000
SVM training time: 0.2375 seconds
MLP training time: 26.8288 seconds
