In [None]:
# Just using Standard Scaler here

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn.datasets as skd
import sklearn.model_selection as skm
import sklearn.datasets
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn import preprocessing


# Importing the data into a data variable 
#f = open('winequality-red.csv')
#headers = f.readline().strip().split(';')
#data = np.loadtxt(f, delimiter=';')

# Importing the data into a data variable 
df = pd.read_csv('winequality-red.csv', header = 0, delimiter=';')
headers = df.columns
data = df.values

# Obtaining the feature matrix into the variable X
X = data[:,:-1]
feature_names = headers[:-1]
y = data[:,11]
output_names = headers[11]


# Splitting the data set to training and test sets
# with test_size = 30%
X_train, X_test, y_train, y_test = skm.train_test_split(X,y,test_size = 0.3, random_state=42)

scaler = preprocessing.StandardScaler().fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)


unique_values, counts = np.unique(y, return_counts=True)


#for value, count in zip(unique_values, counts):
 #   print(f"Number of occurrences of {value}: {count}")

plt.pie(counts,shadow=True,autopct='%.1f%%')
plt.legend(unique_values)
plt.show()

In [None]:
# Setting a range of hidden node sizes to test
# from 10 to 100 in incremenets of 1
hidden_node_sizes = range(160, 161, 1)

# Initializing variables to store best accuracy and hidden node size
accuracy_list = []

# Iterating through the range of hidden node sizes
for hidden_node in hidden_node_sizes:
    
    # Creating the MLP Classifier using the hidden_node_sizes variable
    # on each iteration
    mlp = MLPClassifier(hidden_layer_sizes=((hidden_node,))\
                        ,max_iter=10000,\
                        random_state=42)
    
    # Fitting the MLP on the training data set
    mlp.fit(X_train, y_train)
    
    # Making predictions on the test set
    y_pred = mlp.predict(X_test)
    
    # Calculating the accuracy
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_list.append(accuracy)
    print(hidden_node)
    
# Converting accuracy_list in terms of percent
accuracy_list = [i*100 for i in accuracy_list]

# Reporting the best # of hidden nodes with its accuracy
max_index = np.argmax(accuracy_list)
print(f'The number of hidden nodes that give the best\
 accuracy on the test data set is {hidden_node_sizes[max_index]}\
 with an accuracy of {accuracy_list[max_index]}%')

# Plotting Accuracy vs. # of hidden layers
plt.plot(hidden_node_sizes,accuracy_list)
plt.xlabel('# of hidden nodes')
plt.ylabel('Accuracy (%)')
plt.ylim(20,100)
plt.title('Accuracy vs. # of hidden nodes')
plt.show()



In [None]:
# Using min max normalization

# Importing the data into a data variable 
#f = open('winequality-red.csv')
#headers = f.readline().strip().split(';')
#data = np.loadtxt(f, delimiter=';')

# Importing the data into a data variable 
df = pd.read_csv('winequality-red.csv', header = 0, delimiter=';')
headers = df.columns
data = df.values

# Obtaining the feature matrix into the variable X
X = data[:,:-1]
feature_names = headers[:-1]
y = data[:,11]
output_names = headers[11]


# Splitting the data set to training and test sets
# with test_size = 30%
X_train, X_test, y_train, y_test = skm.train_test_split(X,y,test_size = 0.3, random_state=42)

scaler = preprocessing.MinMaxScaler().fit(X_train)


X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)


unique_values, counts = np.unique(y, return_counts=True)


#for value, count in zip(unique_values, counts):
 #   print(f"Number of occurrences of {value}: {count}")

plt.pie(counts,shadow=True,autopct='%.1f%%')
plt.legend(unique_values)
plt.show()

In [None]:
best_train_acc = 0.0
best_test_acc = 0.0
best_test_nodes = 155
best_train_nodes = 155

# Setting a range of hidden node sizes to test
# from 10 to 100 in incremenets of 1
hidden_node_sizes = range(158, 159, 1)
layer_size = range(4,5,1)

# Initializing variables to store best accuracy and hidden node size
accuracy_list = []

# Iterating through the range of hidden node sizes

for layer in layer_size:
    
    for hidden_node in hidden_node_sizes:

        # Creating the MLP Classifier using the hidden_node_sizes variable
        # on each iteration
        mlp = MLPClassifier(hidden_layer_sizes=((hidden_node,)*layer)\
                            ,max_iter=10000,\
                            random_state=42, activation='relu')

        # Fitting the MLP on the training data set
        mlp.fit(X_train, y_train)

        # Making predictions on the test and training set
        y_pred_test = mlp.predict(X_test)
        y_pred_train = mlp.predict(X_train)

        # Calculating the accuracy
        accuracy_test = accuracy_score(y_test, y_pred_test)
        accuracy_train = accuracy_score(y_train,y_pred_train)
        if accuracy_test > best_test_acc:
            best_test_nodes = hidden_node
            best_test_acc = accuracy_test
        if accuracy_train > best_train_acc:
            best_train_nodes = hidden_node
            best_train_acc = accuracy_train
        # accuracy_list.append(accuracy)
        #print(f'The hidden node size is {hidden_node}')
        #print(f'The hidden layer size is {layer}')
        #print(f'Accuracy train is {accuracy_train}')
        #print(f'Accuracy test is {accuracy_test}')

print(f'best test nodes ={best_test_nodes}')
print(f'best test accuracy ={best_test_acc}')
print(f'best train nodes ={best_train_nodes}')
print(f'best train accuracy ={best_train_acc}')
    
# Converting accuracy_list in terms of percent
#accuracy_list = [i*100 for i in accuracy_list]

# Reporting the best # of hidden nodes with its accuracy
#max_index = np.argmax(accuracy_list)
#print(f'The number of hidden nodes that give the best\
 #accuracy on the test data set is {hidden_node_sizes[max_index]}\
 #with an accuracy of {accuracy_list[max_index]}%')

In [None]:
# Setting a range of hidden node sizes to test
# from 10 to 100 in incremenets of 1
hidden_node_sizes = range(155, 165, 1)
layer_size = range(4,6,1)

# Initializing variables to store best accuracy and hidden node size
accuracy_list = []

# Iterating through the range of hidden node sizes

for layer in layer_size:
    
    for hidden_node in hidden_node_sizes:

        # Creating the MLP Classifier using the hidden_node_sizes variable
        # on each iteration
        mlp = MLPClassifier(hidden_layer_sizes=((hidden_node,)*layer)\
                            ,max_iter=10000,\
                            random_state=42)

        # Fitting the MLP on the training data set
        mlp.fit(X_train, y_train)

        # Making predictions on the test and training set
        y_pred_test = mlp.predict(X_test)
        y_pred_train = mlp.predict(X_train)

        # Calculating the accuracy
        accuracy_test = accuracy_score(y_test, y_pred_test)
        accuracy_train = accuracy_score(y_train,y_pred_train)


        
        # accuracy_list.append(accuracy)
        print(f'The hidden node size is {hidden_node}')
        print(f'The hidden layer size is {layer}')
        print(f'Accuracy train is {accuracy_train}')
        print(f'Accuracy test is {accuracy_test}')
    
    
# Converting accuracy_list in terms of percent
accuracy_list = [i*100 for i in accuracy_list]

# Reporting the best # of hidden nodes with its accuracy
max_index = np.argmax(accuracy_list)
print(f'The number of hidden nodes that give the best\
 accuracy on the test data set is {hidden_node_sizes[max_index]}\
 with an accuracy of {accuracy_list[max_index]}%')

# Plotting Accuracy vs. # of hidden layers
plt.plot(hidden_node_sizes,accuracy_list)
plt.xlabel('# of hidden nodes')
plt.ylabel('Accuracy (%)')
plt.ylim(20,100)
plt.title('Accuracy vs. # of hidden nodes')
plt.show()



In [None]:
y_pred = mlp.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
print(accuracy)

In [None]:
from sklearn.model_selection import GridSearchCV

X = scaler.transform(X)
hidden_node = range(155,165,1)
hidden_node = (2,8,1)

# Define the parameter grid for GridSearchCV
param_grid = {
    'hidden_layer_sizes': [(hidden_node,4) for hidden_node in range(157, 161, 1)],
    'activation': ['relu','tanh'],
    'solver': ['adam','lbfgs'],
    'alpha': [0.001,0.01,0.1],
    'learning_rate': ['constant']
}

mlp = MLPClassifier(max_iter=10000, random_state=42)

# Perform a grid search using cross-validation
grid_search = GridSearchCV(mlp, param_grid, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train,y_train)

# Print the best parameters and the corresponding accuracy
print(f"Best parameters found: {grid_search.best_params_}")
print(f"Best accuracy: {grid_search.best_score_}")

# Evaluate the best model on the test set
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test set accuracy: {accuracy}")

In [None]:
y_pred = grid_search.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
print(accuracy)

In [None]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid for GridSearchCV
param_grid = {
    'hidden_layer_sizes': [(hidden_node,5) for hidden_node in range(155, 165, 1)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['lbfgs', 'sgd', 'adam'],
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'learning_rate': ['constant', 'invscaling', 'adaptive']
}

mlp = MLPClassifier(max_iter=10000, random_state=42)

# Perform a grid search using cross-validation
grid_search = GridSearchCV(mlp, param_grid, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print the best parameters and the corresponding accuracy
print(f"Best parameters found: {grid_search.best_params_}")
print(f"Best accuracy: {grid_search.best_score_}")

# Evaluate the best model on the test set
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test set accuracy: {accuracy}")

In [49]:
# Trying minmax normalization into StandardDevNormalization
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn.datasets as skd
import sklearn.model_selection as skm
import sklearn.datasets
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn import preprocessing

# Importing the data into a data variable 
#f = open('winequality-red.csv')
#headers = f.readline().strip().split(';')
#data = np.loadtxt(f, delimiter=';')

# Importing the data into a data variable 
df = pd.read_csv('winequality-red.csv', header = 0, delimiter=';')
headers = df.columns
data = df.values

# Obtaining the feature matrix into the variable X
#X = data[:,:-1]
#feature_names = headers[:-1]
#y = data[:,11]
#output_names = headers[11]

X = df.drop(['density','fixed acidity','quality'], axis=1)
#X = df[['volatile acidity', 'citric acid', 'residual sugar',
#       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
#       'pH', 'sulphates', 'alcohol']]
#X = df[['residual sugar','fixed acidity','alcohol','sulphates','citric acid']]
y = df['quality']

# Splitting the data set to training and test sets
# with test_size = 30%
X_train, X_test, y_train, y_test = skm.train_test_split(X,y,test_size = 0.3, random_state=42,stratify=y)

scaler = preprocessing.MinMaxScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [50]:
best_train_acc = 0.0
best_test_acc = 0.0
best_test_nodes = 155
best_train_nodes = 155

# Setting a range of hidden node sizes to test
# from 10 to 100 in incremenets of 1
hidden_node_sizes = range(155, 165, 1)
layer_size = range(4,5,1)

# Initializing variables to store best accuracy and hidden node size
accuracy_list = []

# Iterating through the range of hidden node sizes

for layer in layer_size:
    
    for hidden_node in hidden_node_sizes:

        # Creating the MLP Classifier using the hidden_node_sizes variable
        # on each iteration
        mlp = MLPClassifier(hidden_layer_sizes=((hidden_node,)*layer)\
                            ,max_iter=10000,\
                            random_state=30, activation='relu',alpha=1)

        # Fitting the MLP on the training data set
        mlp.fit(X_train, y_train)

        # Making predictions on the test and training set
        y_pred_test = mlp.predict(X_test)
        y_pred_train = mlp.predict(X_train)

        # Calculating the accuracy
        accuracy_test = accuracy_score(y_test, y_pred_test)
        accuracy_train = accuracy_score(y_train,y_pred_train)
        if accuracy_test > best_test_acc:
            best_test_nodes = hidden_node
            best_test_acc = accuracy_test
        if accuracy_train > best_train_acc:
            best_train_nodes = hidden_node
            best_train_acc = accuracy_train
        # accuracy_list.append(accuracy)
        #print(f'The hidden node size is {hidden_node}')
        #print(f'The hidden layer size is {layer}')
        #print(f'Accuracy train is {accuracy_train}')
        #print(f'Accuracy test is {accuracy_test}')

print(f'best test nodes ={best_test_nodes}')
print(f'best test accuracy ={best_test_acc}')
print(f'best train nodes ={best_train_nodes}')
print(f'best train accuracy ={best_train_acc}')
    
# Converting accuracy_list in terms of percent
#accuracy_list = [i*100 for i in accuracy_list]

# Reporting the best # of hidden nodes with its accuracy
#max_index = np.argmax(accuracy_list)
#print(f'The number of hidden nodes that give the best\
 #accuracy on the test data set is {hidden_node_sizes[max_index]}\
 #with an accuracy of {accuracy_list[max_index]}%')

best test nodes =164
best test accuracy =0.6729166666666667
best train nodes =161
best train accuracy =0.9419124218051832


In [16]:
test = df.drop(['alcohol','sulphates','fixed acidity','pH','citric acid','density','total sulfur dioxide','chlorides'], axis=1)
np.shape(test)

(1599, 4)