In [4]:
# Implement dependencies
import pandas as pd
import numpy as np

In [5]:
# Preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

dataset_red=pd.read_csv("Data/winequality-red.csv",sep=";",index_col=False)
dataset_white=pd.read_csv("Data/winequality-white.csv",sep=";",index_col=False)

red=["red"]*len(dataset_red)
dataset_red.insert(0,"Label",red)
white=["white"]*len(dataset_white)
dataset_white.insert(0,"Label",white)
dataset=pd.concat([dataset_red,dataset_white],axis=0)
dataset.set_index("Label")

le=LabelEncoder()
dataset["Label"]=le.fit_transform(dataset["Label"])
y=dataset["Label"]
X=dataset.drop("Label",axis=1)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=1,stratify=y)

In [6]:
# Normalization
from sklearn.preprocessing import Normalizer
norm = Normalizer()
Xn_train=norm.fit_transform(X_train)
Xn_test=norm.transform(X_test)

In [7]:
# Feature selection
from sklearn.feature_selection import SelectKBest,f_classif
select = SelectKBest(f_classif, k=4)
Xnf_train=select.fit_transform(Xn_train,y_train)
Xnf_test=select.transform(Xn_test)

In [14]:
# Logistic Regression
from sklearn.model_selection import cross_val_score
from sklearn import linear_model
lm = linear_model.LogisticRegression(penalty='l2', C=1.0)
lm.fit(Xnf_train,y_train)
score = cross_val_score(lm, Xnf_train, y_train, cv=5, scoring='f1_macro')
print(score.mean())

0.8428438147470543


In [15]:
# SVM classficiation
from sklearn.svm import SVC
clf=SVC(C=1.0,kernel="rbf",degree=3,gamma='auto',probability=True)
clf.fit(Xnf_train,y_train)
score = cross_val_score(clf, Xnf_train, y_train, cv=5, scoring='f1_macro')
print(score.mean())

0.8554992492336606


In [11]:
# Decision Tree
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.tree import DecisionTreeClassifier

clf_fs_cv=DecisionTreeClassifier(criterion="entropy")
p_grid={"splitter":["best","random"],"max_features":[1,2,3,4],"max_depth":[2,3,4,5,6,7,8]}

inner_cv=KFold(n_splits=3,shuffle=True)
outer_cv=KFold(n_splits=5,shuffle=True)

clf = GridSearchCV(estimator=clf_fs_cv, param_grid=p_grid, cv=inner_cv)
nested_score = cross_val_score(clf, Xn_train, y_train, cv=outer_cv)

print(nested_score.mean())

0.972950833544893


In [13]:
# ANN
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
clf = MLPClassifier(hidden_layer_sizes=(256,256,256,256), activation='relu', solver='adam', max_iter=400,   
    batch_size=50)
score = cross_val_score(clf, Xnf_train, y_train, cv=5, scoring='f1_macro')
print(score.mean())

0.9236616683005074
