# Wine Quality Prediction

### Load Dataset

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier,GradientBoostingClassifier
from sklearn.metrics import classification_report

In [None]:
df = pd.read_csv("wine_quality.csv")
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()

### Visualization

In [None]:
df.hist(bins=20,figsize=(10,10))
plt.show()

In [None]:
plt.figure(figsize=(16,12))
sns.heatmap(df.corr(), annot=True, fmt= '.2f')

### Preprocess

In [None]:
df = df.fillna(df.mean())

In [None]:
df['quality_label'] = df['quality'].apply(lambda value: 0 if value <= 5 else 1 if value <= 7 else 2)

X = df.drop(['type', 'quality', 'quality_label'], axis=1)
y = df['quality_label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=2)

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

### Model Training + Evaluation

In [None]:
models = [DecisionTreeClassifier(random_state=1), 
          RandomForestClassifier(random_state=1), 
          AdaBoostClassifier(random_state=1),
          GradientBoostingClassifier(random_state=1)]

for m in models:
    m.fit(X_train, y_train)
    y_pred = m.predict(X_test)
    print(m.__class__.__name__)
    print(classification_report(y_test, y_pred))
    
    feat_import = pd.Series(m.feature_importances_, index=X.columns)
    feat_import.nlargest(25).plot(kind='barh',figsize=(10,10))
    plt.show()

In [None]:
'''
Inspiration
1. https://github.com/dipanjanS/practical-machine-learning-with-python/tree/master/notebooks/Ch09_Analyzing_Wine_Types_and_Quality
2. https://towardsdatascience.com/predicting-wine-quality-with-several-classification-techniques-179038ea6434
'''