Mount Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Import Libraries

In [2]:
import numpy as np
import pandas as pd

Load Data

In [3]:
data = pd.read_csv("/content/drive/MyDrive/AI_data/Winequality_dataset_final.csv")
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,0.56,9.4,0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,0.68,9.8,0
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,0.65,9.8,0
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,0.58,9.8,1
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,0.56,9.4,0


Missing Values treatment

In [4]:
print(data.isnull().sum())

fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
sulphates               0
alcohol                 0
quality                 0
dtype: int64


Min and Max Values

In [5]:
fixed_min = data['fixed acidity'].min()
fixed_max = data['fixed acidity'].max()
volatile_min = data['volatile acidity'].min()
volatile_max = data['volatile acidity'].max()
citric_min = data['citric acid'].min()
citric_max = data['citric acid'].max()
residual_min = data['residual sugar'].min()
residual_max = data['residual sugar'].max()
chlorides_min = data['chlorides'].min()
chlorides_max = data['chlorides'].max()
free_min = data['free sulfur dioxide'].min()
free_max = data['free sulfur dioxide'].max()
total_min = data['total sulfur dioxide'].min()
total_max = data['total sulfur dioxide'].max()
density_min = data['density'].min()
density_max = data['density'].max()
sulphates_min = data['sulphates'].min()
sulphates_max = data['sulphates'].max()
alcohol_min = data['alcohol'].min()
alcohol_max = data['alcohol'].max()
quality_min = data['quality'].min()
quality_max = data['quality'].max()


Normalisation

In [6]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

# Applying scaler() to all the columns except the 'yes-no' and 'dummy' variables
num_vars = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides',
            'free sulfur dioxide', 'total sulfur dioxide', 'density', 'sulphates', 'alcohol', 'quality']
data[num_vars] = scaler.fit_transform(data[num_vars])

Data Split

In [7]:
from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(data, train_size = 0.7, test_size = 0.3, random_state = 100)

In [8]:
y_train = df_train.pop('quality')
X_train = df_train
y_test = df_test.pop('quality')
X_test = df_test

Train the Model

In [9]:
from sklearn.linear_model import LogisticRegression
lr_model = LogisticRegression()
lr_model.fit(X_train,y_train)

LogisticRegression()

In [10]:
y_pred_train = lr_model.predict(X_train)
y_pred_test = lr_model.predict(X_test)

In [11]:
print(" Training Accuracy",lr_model.score(X_train, y_train))
print(" Testing Accuracy",lr_model.score(X_test, y_test))

 Training Accuracy 0.7479892761394102
 Testing Accuracy 0.7354166666666667


In [12]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred_test)

array([[157,  67],
       [ 60, 196]])

In [13]:
confusion_matrix(y_train, y_pred_train)

array([[389, 131],
       [151, 448]])

Deployment

In [14]:
list_of_columns = data.columns
input_data=pd.DataFrame(columns=list_of_columns)
input_data.drop(['quality'], axis='columns', inplace=True)


input_data.at[0, 'fixed acidity'] = float(input('Enter Fixed Acidity Content '))
input_data.at[0, 'volatile acidity'] = float(input('Enter Volatile Acidity Content '))
input_data.at[0, 'citric acid'] = float(input('Enter Citric Acid Content '))
input_data.at[0, 'residual sugar'] = float(input('Enter residual Sugar Content '))
input_data.at[0, 'chlorides'] = float(input('Enter Chlorides Content '))
input_data.at[0, 'free sulfur dioxide'] = float(input('Enter Free Sulphur Dioxide Content '))
input_data.at[0, 'total sulfur dioxide'] = float(input('Enter total Sulphur Dioxide Content'))
input_data.at[0, 'density'] = float(input('Enter Density Content'))
input_data.at[0, 'sulphates'] = float(input('Enter Sulphates Content'))
input_data.at[0, 'alcohol'] = float(input('Enter Alcohol Content'))

Enter Fixed Acidity Content 0.5
Enter Volatile Acidity Content 0.98
Enter Citric Acid Content 1.2
Enter residual Sugar Content .2
Enter Chlorides Content 0.321
Enter Free Sulphur Dioxide Content 0.3659
Enter total Sulphur Dioxide Content0985
Enter Density Content1.32
Enter Sulphates Content1.65
Enter Alcohol Content0.15


In [15]:
# De-normalisation
input_data['fixed acidity']=(input_data['fixed acidity']-fixed_min)/(fixed_max-fixed_min)
input_data['volatile acidity']=(input_data['volatile acidity']-volatile_min)/(volatile_max-volatile_min)
input_data['citric acid']=(input_data['citric acid']-citric_min)/(citric_max-citric_min)
input_data['residual sugar']=(input_data['residual sugar']-residual_min)/(residual_max-residual_min)
input_data['chlorides']=(input_data['chlorides']-chlorides_min)/(chlorides_max-chlorides_min)
input_data['free sulfur dioxide']=(input_data['free sulfur dioxide']-free_min)/(free_max-free_min)
input_data['total sulfur dioxide']=(input_data['total sulfur dioxide']-total_min)/(total_max-total_min)
input_data['density']=(input_data['density']-density_min)/(density_max-density_min)
input_data['sulphates']=(input_data['sulphates']-sulphates_min)/(sulphates_max-sulphates_min)
input_data['alcohol']=(input_data['alcohol']-alcohol_min)/(alcohol_max-alcohol_min)

In [16]:
y_pred = lr_model.predict(input_data)
quality = y_pred*(quality_max-quality_min)+quality_min
if quality == 1:
  print('The wine quality is good..')
else:
  print('The quality of wine is bad..')

The quality of wine is bad..
