# Title:
## Wine Quality Prediction


<h1>Objective</h2>
<p>
The objective of wine quality prediction is to develop a model that can accurately predict the quality of wine based on its chemical properties. This can help winemakers, distributors, and consumers assess the quality of wine without the need for extensive sensory evaluation, saving time and resources. Here are the main objectives and aspects of wine quality prediction:

<ol>
  <li>Data Collection</li>
  <li>Preprocessing</li>
  <li>Feature Extraction</li>
  <li>Model Selection</li>
  <li>Training</li>
  <li>Evaluation</li>
  <li>Optimization</li>
  <li>Deploymen</li>
  <li>Continuous Improvement</li>
</ol>
</p>

### Importing Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from warnings import filterwarnings
filterwarnings(action='ignore')

### Loading Dataset

In [2]:
wine = pd.read_csv("winequality-red.csv")
print("Successfully Imported Data!")
wine.head()

FileNotFoundError: [Errno 2] No such file or directory: 'winequality-red.csv'

In [None]:
print(wine.shape)

### Description

In [None]:
wine.describe(include='all')

### Finding Null Values

In [None]:
print(wine.isna().sum())

In [None]:
wine.corr()

In [None]:
wine.groupby('quality').mean()

## Data Analysis
## Countplot:

In [None]:
sns.countplot(wine['quality'])
plt.show()

In [None]:
sns.countplot(wine['pH'])
plt.show()

In [None]:
sns.countplot(wine['alcohol'])
plt.show()

In [None]:
sns.countplot(wine['fixed acidity'])
plt.show()

In [None]:
sns.countplot(wine['volatile acidity'])
plt.show()

In [None]:
sns.countplot(wine['citric acid'])
plt.show()

In [None]:
sns.countplot(wine['density'])
plt.show()

### KDE plot:

In [None]:
sns.kdeplot(wine.query('quality > 2').quality)

### Distplot:

In [None]:
sns.distplot(wine['alcohol'])

In [None]:
wine.plot(kind ='box',subplots = True, layout =(5,5),sharex = False)

In [None]:
wine.plot(kind ='density',subplots = True, layout =(4,4),sharex = False)

### Histogram

In [None]:
wine.hist(figsize=(15,15),bins=25)
plt.show()

### Heatmap for expressing the correlation

In [None]:
corr = wine.corr()
sns.heatmap(corr,annot=True)

### Pair Plot:

In [None]:
sns.pairplot(wine)

### Violinplot:

In [None]:
sns.violinplot(x='quality', y='alcohol', data=wine)

### Feature Selection

In [None]:
# Create Classification version of target variable
wine['goodquality'] = [1 if x >= 7 else 0 for x in wine['quality']]# Separate feature variables and target variable
X = wine.drop(['quality','goodquality'], axis = 1)
y = wine['goodquality']

In [None]:
# See proportion of good vs bad wines
wine['goodquality'].value_counts()

In [None]:
X

In [None]:
X.shape

In [None]:
y

In [None]:
y.shape

### Feature Importance

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
classifiern = ExtraTreesClassifier()
classifiern.fit(X,y)
score = classifiern.feature_importances_
print(score)

### Train test split

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=7)

### Model selection (LogisticRegression) and model fit

In [None]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
model.intercept_

In [None]:
model.coef_

### Model prediction

In [None]:
y_pred = model.predict(X_test)
print(y_pred)

### Model evaluation

In [None]:
from sklearn.metrics import accuracy_score,confusion_matrix
print("Accuracy Score:",accuracy_score(y_test,y_pred))

In [None]:
confusion_mat = confusion_matrix(y_test,y_pred)
print(confusion_mat)

### Using KNN algorithm:

In [None]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=3)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(y_test,y_pred))

# Using SVC algorithm:

In [None]:
from sklearn.svm import SVC
model = SVC()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(y_test,y_pred))

# Using Decision Tree Algorithm:

In [None]:
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier() # criterion='entropy',random_state=7
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(y_test,y_pred))

### Using GaussianNB algorithm:

In [None]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(y_test,y_pred))

### Using Random Forest Algorithm:

In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state=1)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(y_test,y_pred))

### Using Xgboost Algorithm:

In [None]:
import xgboost as xgb
model = xgb.XGBClassifier(random_state=1)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score
print("Accuracy Score:",accuracy_score(y_test,y_pred))

In [None]:
results = pd.DataFrame({
    'Model': ['Logistic Regression','KNN', 'SVC','Decision Tree' ,'GaussianNB','Random Forest','Xgboost'],
    'Score': [0.870,0.872,0.868,0.864,0.833,0.893,0.879]})

result_df = results.sort_values(by='Score', ascending=False)
result_df = result_df.set_index('Score')
result_df
#results