# RED WINE QUALITY

##### IMPORTING LIBRARIES

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score, recall_score, precision_score
from sklearn.metrics import mean_squared_error
from math import sqrt
%matplotlib inline

In [None]:
rw = pd.read_csv(r'C:\Users\ASUS\Downloads\datasets_4458_8204_winequality-red.csv')

## Data Information:

fixed acidity: :most acids involved with wine or fixed or nonvolatile (do not evaporate readily).

volatile acidity: the amount of acetic acid in wine, which at too high of levels can lead to an unpleasant, vinegar taste.

citric acid: found in small quantities, citric acid can add 'freshness' and flavor to wines.

residual sugar: the amount of sugar remaining after fermentation stops, it's rare to find wines with less than 1 gram/liter and wines with greater than 45 grams/liter are considered sweet.

chlorides: the amount of salt in the wine.

free sulfur dioxide: the free form of SO2 exists in equilibrium between molecular SO2 (as a dissolved gas) and bisulfite ion; it prevents microbial growth and the oxidation of wine.

total sulfur dioxide: amount of free and bound forms of S02; in low concentrations, SO2 is mostly undetectable in wine, but at free SO2 concentrations over 50 ppm, SO2 becomes evident in the nose and taste of wine.

density: the density of wine is close to that of water depending on the percent alcohol and sugar content.

pH: describes how acidic or basic a wine is on a scale from 0 (very acidic) to 14 (very basic); most wines are between 3-4 on the pH scale.

sulphates: a wine additive which can contribute to sulfur dioxide gas (S02) levels, wich acts as an antimicrobial and antioxidant.

alcohol: the percent alcohol content of the wine.

quality: output variable (based on sensory data, score between 0 and 10).

##### DATA PREPROCESSING

In [None]:
rw.head()

In [None]:
rw.columns = rw.columns.str.replace('.','_')

In [None]:
rw.info()

In [None]:
rw.isnull().any()

In [None]:
sb.countplot(rw['quality'])
rw['quality'].value_counts()

In [None]:
corr = rw.corr()['quality'].sort_values(ascending = True)
corr

In [None]:
#correlations between features in percentage

sb.heatmap(rw.corr(),annot=True, fmt='.0%')

In [None]:
print(abs(corr)>0.25)

##### FINAL SELECTION OF FACTORS THAT HIGHLY INFLUENCE THE QUALITY OF WINE

In [None]:
corr[abs(corr)>0.25]

##### REGRESSION MODELS FOR PREDICTION

In [None]:
#separating the dependant and independant variables and splitting the train,test set
X = rw.loc[:,['alcohol','sulphates','volatile_acidity']]
Y = rw.iloc[:,11]
X_train , X_test , y_train ,y_test = train_test_split(X , Y , test_size = 0.30 ,random_state = 35)





##### LINEAR REGRESSION MODEL 

In [None]:
regressor = LinearRegression()
regressor.fit(X_train, y_train)
prediction = regressor.predict(X_test)
prediction = np.round(prediction)

In [None]:
plt.scatter(y_test,prediction)

In [None]:
cm_lr = confusion_matrix(y_test,prediction)
cm_lr

##### DECISION TREE REGRESSOR

In [None]:
regressor = DecisionTreeRegressor()
regressor.fit(X_train,y_train)
prediction_dt = regressor.predict(X_test)
prediction_dt = np.round(prediction_dt)

In [None]:
plt.scatter(y_test,prediction_dt)

In [None]:
cm_dtr = confusion_matrix(y_test,prediction_dt)
cm_dtr

##### RSME OF MODELS

###### RSME OF LINEAR REGRESSION MODEL

In [None]:
RSME = sqrt(mean_squared_error(y_test,prediction))
print(RSME)

###### RSME OF DECISION TREE REGRESSOR MODEL

In [None]:
RSME = sqrt(mean_squared_error(y_test,prediction_dt))
print(RSME)