# Import Modules

In [16]:
import pandas as pd
import numpy as np
import sklearn
import seaborn as sns
import matplotlib.pyplot as plt
import xgboost as xgb
from collections import Counter
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier

In [17]:
# Warnings are ignored in the following for better readability
import warnings
warnings.filterwarnings('ignore')

# Import Datasets

In [18]:
redwine = pd.read_csv("winequality-red (1).csv", delimiter=';')
redwine_3 = redwine.copy()
redwine_3['quality'] = redwine_3['quality'].apply(lambda x: 0 if x <= 4 else 1 if x <= 6 else 2)

whitewine = pd.read_csv("winequality-white (2).csv", delimiter=';')
whitewine_3 = whitewine.copy()
whitewine_3['quality'] = whitewine_3['quality'].apply(lambda x: 0 if x <= 4 else 1 if x <= 6 else 2)

# Final Model: Random Forest

In [19]:
# Redwine - Final Model

x = redwine_3[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'total sulfur dioxide', 'sulphates', 'alcohol']]
y = redwine_3['quality']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25,random_state=299)

class_weights = {
    0: 1 / 63,
    1: 1 / 1319,
    2: 1 / 217
}

red = RandomForestClassifier(class_weight=class_weights)
red.fit(x_train, y_train)
rf_predict = red.predict(x_test)
rf_conf_matrix = confusion_matrix(y_test, rf_predict)
rf_f1_score = f1_score(y_test, rf_predict, average='weighted')
print(rf_f1_score * 100)
rf_acc_score = accuracy_score(y_test, rf_predict)
print(rf_acc_score*100)


86.74938383174499
89.25


In [20]:
# Whitewine Final Model
x = whitewine_3[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'total sulfur dioxide', 'sulphates', 'alcohol']]
y = whitewine_3['quality']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25,random_state=242)

class_weights = {
    0: 1 / 183,
    1: 1 / 3655,
    2: 1 / 1060    
}

white = RandomForestClassifier()
white.fit(x_train, y_train)
rf_predict=white.predict(x_test)
rf_conf_matrix = confusion_matrix(y_test, rf_predict)
rf_f1_score = f1_score(y_test, rf_predict, average='weighted')
print(rf_f1_score * 100)
rf_acc_score = accuracy_score(y_test, rf_predict)
print(rf_acc_score*100)

81.26238498354105
82.85714285714286


## Random Forest - prediction

In [None]:
redWhite = input("Geben Sie weiß für Weißwein und rot für Rotwein ein: ")

if redWhite == "weiß" or redWhite == "white": # User wants to predict Whitewine
    print("Qualitätsanalyse für Weißwein")
    example_data = []
    values = input("Bitte geben Sie die Werte für den Array ein, getrennt durch Kommas: ")
    values_list = values.split(',')
    if len(values_list) != 8:
        print("Fehler: Es müssen genau 8 Werte eingegeben werden!")
    else:
        try:
            example_data.append([float(value) for value in values_list])
            print("Folgende Eingabe erfolgreich:")
            print(example_data)
        except ValueError:
            print("Fehler: Ungültige Eingabe! Es dürfen nur Zahlen eingegeben werden.")
    predicted_class = white.predict(example_data)
    print(predicted_class)
elif redWhite == "rot" or redWhite == "red": # User wants to predict Redwine
    print("Qualitätsanalyse für Rotwein")
    example_data = []
    values = input("Bitte geben Sie die Werte für den Array ein, getrennt durch Kommas: ")
    values_list = values.split(',')
    if len(values_list) != 8:
        print("Fehler: Es müssen genau 8 Werte eingegeben werden!")
    else:
        try:
            example_data.append([float(value) for value in values_list])
            print("Folgende Eingabe erfolgreich:")
            print(example_data)
        except ValueError:
            print("Fehler: Ungültige Eingabe! Es dürfen nur Zahlen eingegeben werden.")
    predicted_class = red.predict(example_data)
    print(predicted_class)

In [None]:
"""""
7.871429,0.724206,0.173651,2.684921,0.09573,34.444444,0.592222,10.215873    # Rot 0
8.254284,0.538560,0.258264,2.503867,0.088973,48.946929,0.647263,10.252717   # Rot 1
8.847005,0.405530,0.376498,2.708756,0.075912,34.889401,0.743456,11.518049   # Rot 2

7.180874,0.375984,0.307705,4.821038,0.050557,130.232240,0.475956,10.173497  # Weiß 0
6.876060,0.277086,0.337877,6.797729,0.047740,142.571272,0.487557,10.269808  # Weiß 1
6.725142,0.265349,0.326057,5.261509,0.038160,125.245283,0.500142,11.416022  # Weiß 2
"""""