### Imports


In [2]:
import pandas as pd
import xgboost as xgb
#import lightgbm as lgb
import os
from importlib import reload
from sklearn.metrics import accuracy_score

from sklearn.model_selection import cross_val_score
from sklearn.metrics import recall_score, precision_score, f1_score, accuracy_score, mean_squared_error, mean_absolute_error, r2_score
import numpy as np
from sklearn.neural_network import MLPClassifier
import xgboost as xgb
from sklearn.linear_model import LogisticRegression

import ipywidgets as widgets
from IPython.display import clear_output, display

### Load Data

In [3]:
# Set the working directory
os.chdir(r'c:/Users/kamil/Documents/PredictModel/WineRatePrediction/wine_rank')

processed_train= r'c:/Users/kamil/Documents/PredictModel/WineRatePrediction/wine_rank/data/processed/processed_data_train.csv'
processed_test= r'c:/Users/kamil/Documents/PredictModel/WineRatePrediction/wine_rank/data/processed/processed_data_test.csv'

In [4]:
train_data = pd.read_csv(processed_train)
test_data = pd.read_csv(processed_test)

# MAP QUALITY TO 0-6
mapping = {3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6}
train_data['quality'] = train_data['quality'].map(mapping)
test_data['quality'] = test_data['quality'].map(mapping)

X_train = train_data.drop('quality', axis=1)
y_train = train_data['quality']

X_test = test_data.drop('quality', axis=1)
y_test = test_data['quality']

### Selection of hyperparameters

In [32]:
import functions.LR_hyperparams_search
import functions.xgboost_hyperparams_search
import functions.MLPC_hyperparams_search
import functions.MLP_model

reload(functions.LR_hyperparams_search)  # To ensure the latest version is loaded
reload(functions.xgboost_hyperparams_search) 
reload(functions.MLPC_hyperparams_search)
reload(functions.MLP_model)

from functions.xgboost_hyperparams_search import xgboost_hyperparams_search
from functions.LR_hyperparams_search import LR_hyperparams_search
from functions.MLPC_hyperparams_search import MLP_hyperparams_search
from functions.MLP_model import create_mlp_functional, MLP_model_hyperparams_search


In [33]:
# widgets 

model_widget = widgets.Dropdown(
    options=['Logistic Regression', 'XGBoost', 'MLP Classifier', 'MLP Model'],
    value='Logistic Regression',
    description='Model:'
)

search_type_widget = widgets.Dropdown(
    options=['grid', 'random', 'optuna'],
    value='random',
    description='Search type:'
)

n_trials_widget = widgets.IntSlider(
    value=10,
    min=1,
    max=100,
    step=1,
    description='n_trials:',
    continuous_update=False
)

# Button widget
search_button = widgets.Button(description="Start Search")

# Definition of the function that will be called when the button is clicked
def hyperparameter_search(model_type, type_of_search, n_trials):
    if model_type == 'Logistic Regression':
        LR_best_model = LR_hyperparams_search(X_train, y_train, X_test, y_test, n_trials=n_trials, type_of_search=type_of_search)
        print(f"Best Logistic Regression Model: {LR_best_model}")
    elif model_type == 'XGBoost':
        XGBoost_best_model = xgboost_hyperparams_search(X_train, y_train, X_test, y_test, n_trials=n_trials, type_of_search=type_of_search)
        print(f"Best XGBoost Model: {XGBoost_best_model}")

    elif model_type == 'MLP Classifier':
        MLP_best_model = MLP_hyperparams_search(X_train, y_train, X_test, y_test, n_trials=n_trials, type_of_search=type_of_search)
        print(f"Best MLP Classifier Model: {MLP_best_model}")

    elif model_type == 'MLP Model':
        MLP_model= MLP_model_hyperparams_search(X_train, y_train, X_test, y_test, n_trials=n_trials, type_of_search=type_of_search)
        print(f'Best MLP Model: {MLP_model}')
    else:
        print("Unknown model type")


def on_search_button_click(b):
    clear_output(wait=True)  # Clear the output of the current output cell receiving output
    display(model_widget, search_type_widget, n_trials_widget, search_button) # show again the widgets
    model_type = model_widget.value
    search_type = search_type_widget.value
    n_trials = n_trials_widget.value
    print(f"Searching for the best hyperparameters for the {model_type} model using {search_type} search.\n Wait a moment...")
    
    # Start the hyperparameter search
    hyperparameter_search(model_type, search_type, n_trials)

# Assign the event handler to the button
search_button.on_click(on_search_button_click)

# Display the widgets
display(model_widget, search_type_widget, n_trials_widget, search_button)

Dropdown(description='Model:', index=3, options=('Logistic Regression', 'XGBoost', 'MLP Classifier', 'MLP Mode…

Dropdown(description='Search type:', options=('grid', 'random', 'optuna'), value='grid')

IntSlider(value=2, continuous_update=False, description='n_trials:', min=1)

Button(description='Start Search', style=ButtonStyle())

Searching for the best hyperparameters for the MLP Model model using grid search.
 Wait a moment...


### Train model

In [6]:
# Definition of models to train
models = {
    "XGBoost": xgb.XGBClassifier(),
    "MLPC": MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42),
    "Logistic_Regression": LogisticRegression(max_iter=1000),
    "MLP": create_mlp_functional(input_dim=X_train.shape[1], output_dim=7)
}

# Training and evaluation of models
for model_name, model in models.items():
    
    if model_name == 'MLP':
        model.fit(X_train, y_train, epochs=10, verbose=0)
        y_pred_probs = model.predict(X_test, verbose=0)
        y_pred = np.argmax(y_pred_probs, axis=1) # Wybór klasy z największym prawdopodobieństwem
    else:
        model.fit(X_train, y_train)
        # Predicting on the test set
        y_pred = model.predict(X_test)
        

    data= pd.DataFrame({"predict": y_pred, "real": y_test})
    # Calculating metrics
    precision = precision_score(y_test, y_pred, average='micro')
    recall = recall_score(y_test, y_pred, average='micro')
    f1 = f1_score(y_test, y_pred, average='micro')
    accuracy = accuracy_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"Model: {model_name}")
    
    display(data.head(10))

    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Accuracy: {accuracy:.4f}")
    
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"R-squared (R2): {r2:.4f}\n")

    # # Cross-validation
    # cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
    # print(f"Cross-validation Scores (Accuracy): {[score for score in cv_scores]}\n")
    # print(f"Mean Accuracy from Cross-validation: {cv_scores.mean():.4f}\n")

Model: XGBoost


Unnamed: 0,predict,real
0,3,2
1,3,2
2,3,3
3,2,2
4,3,3
5,2,3
6,2,2
7,3,3
8,2,2
9,2,2


Precision: 0.6569
Recall: 0.6569
F1 Score: 0.6569
Accuracy: 0.6569
Mean Squared Error (MSE): 0.4838
Mean Absolute Error (MAE): 0.3869
R-squared (R2): 0.3706

Model: MLPC




Unnamed: 0,predict,real
0,2,2
1,2,2
2,2,3
3,2,2
4,3,3
5,2,3
6,2,2
7,3,3
8,2,2
9,2,2


Precision: 0.5677
Recall: 0.5677
F1 Score: 0.5677
Accuracy: 0.5677
Mean Squared Error (MSE): 0.5977
Mean Absolute Error (MAE): 0.4838
R-squared (R2): 0.2225

Model: Logistic_Regression


Unnamed: 0,predict,real
0,2,2
1,3,2
2,2,3
3,2,2
4,3,3
5,2,3
6,2,2
7,3,3
8,2,2
9,2,2


Precision: 0.5192
Recall: 0.5192
F1 Score: 0.5192
Accuracy: 0.5192
Mean Squared Error (MSE): 0.6908
Mean Absolute Error (MAE): 0.5462
R-squared (R2): 0.1014

Model: MLP


Unnamed: 0,predict,real
0,2,2
1,3,2
2,2,3
3,2,2
4,3,3
5,2,3
6,2,2
7,3,3
8,2,2
9,2,2


Precision: 0.5262
Recall: 0.5262
F1 Score: 0.5262
Accuracy: 0.5262
Mean Squared Error (MSE): 0.6762
Mean Absolute Error (MAE): 0.5377
R-squared (R2): 0.1204



In [10]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import xgboost as xgb

# Train the XGBRegressor model
xgb_regressor = xgb.XGBRegressor()
xgb_regressor.fit(X_train, y_train)

# Predict on the test set
y_pred = xgb_regressor.predict(X_test)

# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"R-squared (R2): {r2:.4f}")

Mean Squared Error (MSE): 0.4314
Mean Absolute Error (MAE): 0.4705
R-squared (R2): 0.4388
