# Project 7 - Logistic Regression and Support Vector Machines

## Notebook imports and settings

In [1]:
# Python imports

# Numerical imports
import numpy as np
import pandas as pd

# SkLearn imports
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_fscore_support
from sklearn.svm import SVC

# Plotting imports
import matplotlib.pyplot as plt
import seaborn as sns

# Plotting settings
sns.set()
sns.set_palette("colorblind")
sns.set_style("ticks")

## Load and format data

In [2]:
red_wine = pd.read_csv("./data/winequality-red.csv", sep=";")
white_wine = pd.read_csv("./data/winequality-white.csv", sep=";")

# Threshold for 1 = good, 0 = bad; threshold value chosen for 50/50 dataset split
red_wine[red_wine["quality"] < 6] = 0
red_wine[red_wine["quality"] >= 6] = 1

white_wine[white_wine["quality"] < 5] = 0
white_wine[white_wine["quality"] >= 5] = 1

combined_wine = pd.concat([red_wine, white_wine])

# Split into test / train
split = int(0.8 * len(red_wine))
red_wine_train = red_wine[:split]
red_wine_test = red_wine[split:]

split = int(0.8 * len(white_wine))
white_wine_train = white_wine[:split]
white_wine_test = white_wine[split:]

split = int(0.8 * len(combined_wine))
combined_wine_train = combined_wine[:split]
combined_wine_test = combined_wine[split:]

print("____WHITE WINE____")
display(white_wine.tail(3))

print("____RED WINE____")
display(red_wine.tail(3))

print("____BOTH WINES____")
display(combined_wine.tail(3))

____WHITE WINE____


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
4895,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
4896,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
4897,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1


____RED WINE____


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
1596,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
1597,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1598,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1


____BOTH WINES____


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
4895,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
4896,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
4897,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1


## Logistic Regression

In [9]:
y = combined_wine_train[["quality"]].values.flatten()
X = combined_wine_train.drop("quality", axis=1)
clf = LogisticRegression(random_state=0).fit(X, y)

y_true = combined_wine_test[["quality"]].values.flatten()
X = combined_wine_test.drop("quality", axis=1)
y_pred = clf.predict(X)

precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred)

print(f"Precision with bad wine:  {precision[0]:.4f}")
print(f"Precision with good wine: {precision[1]:.4f}")
print()

print(f"Recall with bad wine:     {recall[0]:.4f}")
print(f"Recall with good wine:    {recall[1]:.4f}")
print()

print(f"F1 Score with bad wine:   {f1[0]:.4f}")
print(f"F1 Score with good wine:  {f1[1]:.4f}")

Precision with bad wine:  1.0000
Precision with good wine: 1.0000

Recall with bad wine:     1.0000
Recall with good wine:    1.0000

F1 Score with bad wine:   1.0000
F1 Score with good wine:  1.0000


## Support Vector Machine (SVM)

In [10]:
y = combined_wine_train[["quality"]].values.flatten()
X = combined_wine_train.drop("quality", axis=1)
clf = SVC(gamma="auto").fit(X, y)

y_true = combined_wine_test[["quality"]].values.flatten()
X = combined_wine_test.drop("quality", axis=1)
y_pred = clf.predict(X)

precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred)

print(f"Precision with bad wine:  {precision[0]:.4f}")
print(f"Precision with good wine: {precision[1]:.4f}")
print()

print(f"Recall with bad wine:     {recall[0]:.4f}")
print(f"Recall with good wine:    {recall[0]:.4f}")
print()

print(f"F1 Score with bad wine:   {f1[0]:.4f}")
print(f"F1 Score with good wine:  {f1[0]:.4f}")

Precision with bad wine:  1.0000
Precision with good wine: 1.0000

Recall with bad wine:     1.0000
Recall with good wine:    1.0000

F1 Score with bad wine:   1.0000
F1 Score with good wine:  1.0000
