Importing the dependencies

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

Data Collection

In [None]:
#loading the dataset to pandas DataFrame
wine_dataset = pd.read_csv('winequality-red.csv')

In [None]:
wine_dataset.shape

In [None]:
wine_dataset.head()

In [None]:
#checking for missing values
wine_dataset.isnull().sum()

Data analysis and visualization

In [None]:
#statistical measures of the dataset
wine_dataset.describe()

In [None]:
#number of values for each quality
sns.catplot(x="quality", kind="count", palette="ch:.25", data=wine_dataset)

In [None]:
#comparing columns
plot = plt.figure(figsize=(5,5))
sns.barplot(x='quality', y='citric acid', data=wine_dataset)

Correlation

In [None]:
correlation = wine_dataset.corr()


In [None]:
#constructing a heatmap to understand the correlation between the columns
plt.figure(figsize=(10,10))
sns.heatmap(correlation,cbar=True,square=True,fmt='.1f',annot=True,annot_kws={'size':8},cmap='Greens')

Data Preprocessing

In [None]:
# separate the data and label
X = wine_dataset.drop('quality', axis=1)


In [None]:
print(X)

Label Binarization

In [None]:
# Y = wine_dataset['quality'].replace([3,4,5,6,7,8,9],['bad','bad','bad','good','good','good','good'])
Y = wine_dataset['quality'].apply(lambda y_value: 1 if y_value >= 7 else 0)
print(Y.value_counts())

Train & Test Split

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

Model training :::
Random Forest Classifier

In [None]:
model = RandomForestClassifier()

In [None]:
model.fit(X_train, Y_train)

Model Evaluation

In [None]:
#accuracy on test data
X_test_pred = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_pred, Y_test)

In [None]:
print('Accuracy:', test_data_accuracy)

Building a predictive System

In [None]:
input_data = (7.3,0.65,0.0,1.2,0.065,15.0,21.0,0.9946,3.39,0.47,10.0)

#changing the input data to numpy array
input_data_as_numpy_array = np.asarray(input_data)

#reshape the array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)
print(prediction)

if (prediction[0] == 1):
  print("The wine is of good quality")
else:
    print("The wine is of bad quality")