===============================================================================================================

<div style="margin-left: -60px;">
    <img src="../images/logos/vinometrics_logo.jpg" width="300" height="200" />
</div>


<div style="text-align: center;">
    <h1 style="font-size: 32px; color: #2E86C1;">Wine Quality Predictor</h1>
</div>


===============================================================================================================

To predict wine quality, run the function below:

In [7]:
predict_wine_quality(trained_wine_model, preprocessing_pipeline)

Enter value for fixed_acidity (g/L, Scale: 0-20): 7.7
Enter value for volatile_acidity (g/L, Scale: 0-2): 0.620
Enter value for citric_acid (g/L, Scale: 0-1): 0.04
Enter value for residual_sugar (g/L, Scale: 0-200): 3.8
Enter value for chlorides (g/L, Scale: 0-0.2): 0.084
Enter value for total_sulfur_dioxide (ppm, Scale: 0-300): 45
Enter value for density (g/mL, Scale: 0.98-1.04): 0.99780
Enter value for pH (pH scale, Scale: 2.8-4.0): 3.34
Enter value for sulphates (g/L, Scale: 0-2): 0.53
Enter value for alcohol (% ABV, Scale: 5-15): 9.5
Predicted wine quality: 5.1


===============================================================================================================

#### Sample data

###  Casal Garcia Vinho Verde

#### Known information

Sourced Casal Garcia Vinho Verde information from https://www.vinello.de/en/vinho-verde-casal-garcia

Wine acidity in g/l: 6.2

Residual sugar (approx.) in g/l: 12.4

Alcohol % ABV: 9.5

In [4]:
#Identification of Casal Garcia Vinho Verde through filtering anonymous dataset with known feature values
w_alcohol_filtered_df = white_df[white_df['alcohol'] == 9.5]
w_sugar_filtered_df = w_alcohol_filtered_df[w_alcohol_filtered_df['residual_sugar'] == 12.4]
casal_garcia_white = w_sugar_filtered_df[w_sugar_filtered_df['fixed_acidity'] == 6.2]
casal_garcia_white_x ed_df.drop(['quality', 'free_sulfur_dioxide'], axis=1)
casal_garcia_white_y

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
3131,6.2,0.22,0.3,12.4,0.054,108.0,152.0,0.99728,3.1,0.47,9.5,6


In [6]:
 X_test.head()

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,total_sulfur_dioxide,density,pH,sulphates,alcohol
51,7.7,0.62,0.04,3.8,0.084,45.0,0.9978,3.34,0.53,9.5
1100,8.2,0.635,0.1,2.1,0.073,60.0,0.99638,3.29,0.75,10.9
1312,8.4,0.37,0.43,2.3,0.063,19.0,0.9955,3.17,0.81,11.2
514,9.9,0.49,0.58,3.5,0.094,43.0,1.0004,3.29,0.58,9.0
1074,6.3,1.02,0.0,2.0,0.083,24.0,0.99437,3.59,0.55,11.2


In [8]:
y_test.head()

Unnamed: 0,quality
51,5
1100,6
1312,7
514,5
1074,4


===============================================================================================================
<br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br> <!-- This will add three lines of space -->

===============================================================================================================
### Set up (run first)

#### Importing Functions and Libraries

In [1]:
import pandas as pd
import numpy as np
from joblib import load
from sklearn.model_selection import train_test_split
import importlib

#Importing our own functions from our repo
import functions
importlib.reload(functions)

from functions import handle_outliers, apply_outlier_thresholds, predict_wine_quality

#### Importing model and preprocessing pipeline

In [2]:
# Loading the model from the repo
trained_wine_model = load('wine_quality_predictor_model.joblib')
# Loading the preprocessing pipeline from the repo
preprocessing_pipeline = load('preprocessing_pipeline.joblib')

#### Importing test data

In [3]:
red_file_path = '../data/cleaned/winequality_red_cleaned.csv'
white_file_path = '../data/cleaned/winequality_white_cleaned.csv'
red_df = pd.read_csv(red_file_path, index_col=0)
white_df = pd.read_csv(white_file_path, index_col=0)
X = red_df.drop(['quality', 'free_sulfur_dioxide'], axis=1)
y = pd.DataFrame(red_df["quality"])
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 42)

## Identifying Casal Garcia Vinho Verde in anonymous dataset

In [5]:
#Identification of Cultura Vini Vinho Verde through filtering anonymous dataset with known feature values
r_alcohol_filtered_df = red_df[red_df['alcohol'] == 10.5]
r_alcohol_filtered_df

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
8,7.5,0.5,0.36,6.1,0.071,17.0,102.0,0.9978,3.35,0.8,10.5,5
14,8.5,0.28,0.56,1.8,0.092,35.0,103.0,0.9969,3.3,0.75,10.5,7
36,7.3,0.45,0.36,5.9,0.074,12.0,87.0,0.9978,3.33,0.83,10.5,5
38,7.5,0.49,0.2,2.6,0.332,8.0,14.0,0.9968,3.21,0.9,10.5,6
64,8.0,0.705,0.05,1.9,0.074,8.0,19.0,0.9962,3.34,0.95,10.5,6
70,8.8,0.41,0.64,2.2,0.093,9.0,42.0,0.9986,3.54,0.66,10.5,5
86,5.0,1.02,0.04,1.4,0.045,41.0,85.0,0.9938,3.75,0.48,10.5,4
104,9.4,0.4,0.31,2.2,0.09,13.0,62.0,0.9966,3.07,0.63,10.5,6
116,8.0,0.59,0.16,1.8,0.065,3.0,16.0,0.9962,3.42,0.92,10.5,7
133,8.2,0.4,0.44,2.8,0.089,11.0,43.0,0.9975,3.53,0.61,10.5,6
