## IMPORTS

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from joblib import dump, load
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import classification_report

from sklearn.metrics import confusion_matrix
import math
import numpy as np
from sklearn.metrics import accuracy_score
from tqdm import tqdm
from sklearn.metrics import mean_squared_error
import random

### Setting Up Files

In [8]:
drugsx = pd.read_csv(r'drugs_x_train.csv')
drugsy = pd.read_csv(r'drugs_y_train.csv')

redx = pd.read_csv(r'winequality_red_x_train.csv')
redy = pd.read_csv(r'winequality_red_y_train.csv')

whitex = pd.read_csv(r'winequality_white_x_train.csv')
whitey = pd.read_csv(r'winequality_white_y_train.csv')

## Splitting and creating the model for drugs:

In [9]:
X_train, X_test, y_train, y_test = train_test_split(drugsx, drugsy, random_state=0, train_size=0.8)
X_train = X_train.values
y_train = y_train.values
clf = MLPClassifier(random_state=0, max_iter=1000)
clf.fit(X_train, y_train)
model_file = "drugmodel.joblib"
dump(clf, model_file)



['drugmodel.joblib']

Loading the model for drugs:

In [10]:
X_test = X_test.values
y_test = y_test.values

In [11]:
def one_hot_encoding(nd_array):
    one_hot_encoding_predictions = nd_array

    for i in range(len(nd_array)):
        max_pred = max(nd_array[i])

        for j in range(len(nd_array[i])):
            one_hot_encoding_predictions[i][j] = 1 if nd_array[i][j] == max_pred else 0

    return one_hot_encoding_predictions

predictions = clf.predict_proba(X_test)
predictions = one_hot_encoding(predictions)
benchmarks = multilabel_confusion_matrix(y_test, predictions)
report = classification_report(y_test, predictions, zero_division=0)
print(report)

              precision    recall  f1-score   support

           0       0.68      1.00      0.81        13
           1       0.75      1.00      0.86         3
           2       1.00      1.00      1.00         3
           3       1.00      1.00      1.00         1

   micro avg       0.74      1.00      0.85        20
   macro avg       0.86      1.00      0.92        20
weighted avg       0.76      1.00      0.86        20
 samples avg       0.74      0.74      0.74        20



## Benchmarking for drugs:

In [12]:
#x_samples = []
#y_samples = []
#X_train, X_test, y_train, y_test = train_test_split(drugsx, drugsy, random_state=0, train_size=0.8)
#def objective_function(x, y, x_validation, y_validation, params):
#    model = MLPClassifier(
#        random_state=1, 
#        max_iter=300,
#        alpha=params['alpha'],
#        learning_rate_init=params['learning_rate']
#    )

#    model.fit(x, y)

#    predictions = model.predict(x_validation)

#    return mean_squared_error(predictions, y_validation)

#num_samples = 10
#for i in range(num_samples):
#    params = {
#        'alpha': random.randrange(0, 100) * 0.0001,
#        'learning_rate': random.randrange(0, 100) * 0.0001
#    }

#    loss = objective_function(X_train.values, y_train.values, X_test.values, y_test.values, params)
#    print("Loss: {}".format(loss))

#    x1 = params['alpha']
#    x2 = params['learning_rate']
#    y = loss

#    x_samples.append([x1, x2])
#    y_samples.append([y])

## Splitting and creating a model for red wine quality:

In [13]:
X_train, X_test, y_train, y_test = train_test_split(redx, redy, random_state=0, train_size=0.8)
X_train = X_train.values
y_train = y_train.values
clf = MLPClassifier(random_state=0, max_iter=1000)
clf.fit(X_train, y_train)
model_file = "redwinemodel.joblib"
dump(clf, model_file)



['redwinemodel.joblib']

Loading the model:

In [14]:
X_test = X_test.values
y_test = y_test.values

In [15]:
def one_hot_encoding(nd_array):
    one_hot_encoding_predictions = nd_array

    for i in range(len(nd_array)):
        max_pred = max(nd_array[i])

        for j in range(len(nd_array[i])):
            one_hot_encoding_predictions[i][j] = 1 if nd_array[i][j] == max_pred else 0

    return one_hot_encoding_predictions

predictions = clf.predict_proba(X_test)
predictions = one_hot_encoding(predictions)
benchmarks = multilabel_confusion_matrix(y_test, predictions)
report = classification_report(y_test, predictions, zero_division = 0)
print(report)

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.00      0.00      0.00         6
           2       0.74      0.70      0.72       104
           3       0.54      0.74      0.62        76
           4       0.75      0.32      0.45        28
           5       0.00      0.00      0.00         0

   micro avg       0.64      0.64      0.64       215
   macro avg       0.34      0.29      0.30       215
weighted avg       0.64      0.64      0.63       215
 samples avg       0.64      0.64      0.64       215



## Splitting and creating a model for white wine quality:

In [16]:
X_train, X_test, y_train, y_test = train_test_split(whitex, whitey, random_state=0, train_size=0.8)
X_train = X_train.values
y_train = y_train.values
clf = MLPClassifier(random_state=0, max_iter=1000)
clf.fit(X_train, y_train)
model_file = "whitewinemodel.joblib"
dump(clf, model_file)

['whitewinemodel.joblib']

Loading the model:

In [17]:
X_test = X_test.values
y_test = y_test.values

In [18]:
def one_hot_encoding(nd_array):
    one_hot_encoding_predictions = nd_array

    for i in range(len(nd_array)):
        max_pred = max(nd_array[i])

        for j in range(len(nd_array[i])):
            one_hot_encoding_predictions[i][j] = 1 if nd_array[i][j] == max_pred else 0

    return one_hot_encoding_predictions

predictions = clf.predict_proba(X_test)
predictions = one_hot_encoding(predictions)
benchmarks = multilabel_confusion_matrix(y_test, predictions)
report = classification_report(y_test, predictions, zero_division = 0)
print(report)

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.60      0.12      0.20        25
           2       0.62      0.57      0.59       198
           3       0.57      0.75      0.65       304
           4       0.53      0.35      0.42       103
           5       1.00      0.04      0.08        24
           6       0.00      0.00      0.00         1

   micro avg       0.58      0.58      0.58       657
   macro avg       0.47      0.26      0.28       657
weighted avg       0.59      0.58      0.56       657
 samples avg       0.58      0.58      0.58       657

