In [None]:
# imports
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import statsmodels.formula.api as smf
import numpy as np
from sklearn.metrics import precision_recall_fscore_support
import random
from sklearn.linear_model import LogisticRegression
from sklearn import svm

In [None]:
data = pd.read_csv('wine-quality-white-and-red.csv')
data['quality'] = np.where(data['quality'] > 5, 1, 0)
data['wineType'] = np.where(data['type'] == "white", 1, 0)
wineData = data[['fixed acidity', 'wineType', 'volatile acidity', 'citric acid', 'total sulfur dioxide', 'pH', 'quality']].copy()
wineData.reset_index()

In [None]:
# Standardize input variables

wineData['fixed acidity'] = (wineData['fixed acidity'] - wineData['fixed acidity'].mean()) / wineData['fixed acidity'].std()

wineData['volatile acidity'] = (wineData['volatile acidity'] - wineData['volatile acidity'].mean()) / wineData['volatile acidity'].std()

wineData['citric acid'] = (wineData['citric acid'] - wineData['citric acid'].mean()) / wineData['citric acid'].std()

wineData['total sulfur dioxide'] = (wineData['total sulfur dioxide'] - wineData['total sulfur dioxide'].mean()) / wineData['total sulfur dioxide'].std()

wineData['pH'] = (wineData['pH'] - wineData['pH'].mean()) / wineData['pH'].std()

In [None]:
wineData = wineData[wineData['total sulfur dioxide'] < 2]
wineData = wineData[wineData['volatile acidity'] < 4]
wineData

In [None]:
def getPrecision(lm, X, y):
    
    y_pred = lm.predict(X)

    p,r,f,s = precision_recall_fscore_support(y, y_pred, zero_division=1)
    display('precision = {}'.format(p))
    display('recall = {}'.format(r))
    display('f-score = {}'.format(f))
    
    return [p,r,f,s]

In [None]:
X = wineData[['total sulfur dioxide', 'volatile acidity']]
y = wineData.quality

lm = LogisticRegression(class_weight={0:0.1, 1:1.0})
lm.fit(X, y)

color = ['r' if y_ == 0 else 'b' for y_ in y]
plt.scatter(wineData['total sulfur dioxide'], wineData['volatile acidity'], c=color, s=3)
plt.xlabel('total sulfur dioxide')
plt.ylabel('volatile acidity')

# Plot random points colored by predicted value
b0 = lm.intercept_
b = lm.coef_[0]

# Plot random points colored by prediction

n = 5000
newx = [random.uniform(-40,50) for _ in range(n)]
newy = [random.uniform(-40,70) for _ in range(n)]
df_temp = pd.DataFrame({'total sulfur dioxide':newx, 'volatile acidity':newy})
newlabel = lm.predict(df_temp)


In [None]:
color = ['r' if y_ == 0 else 'b' for y_ in newlabel]
plt.figure()
plt.scatter(newx, newy, c=color, marker='o', s=3)
plt.xlabel('total sulfur dioxide')
plt.ylabel('volatile acidity')

# Show original data
color = ['r' if y_ == 0 else 'b' for y_ in y]
plt.scatter(wineData['total sulfur dioxide'], wineData['volatile acidity'], c=color, s=3)

b0 = lm.intercept_[0]
b = lm.coef_[0]

Xdb = [-2, 2]
ydb = [-(b0+b[0]*x)/b[1] for x in Xdb]
plt.plot(Xdb, ydb)
plt.xlabel('total sulfur dioxide')
plt.ylabel('volatile acidity')

In [None]:
'total sulfur dioxide', 'volatile acidity'

plt.figure()
color = ['r' if y_ == 0 else 'b' for y_ in y]
plt.scatter(wineData['total sulfur dioxide'], wineData['volatile acidity'], c=color, s=1)

b0 = lm.intercept_[0] - 3
b = lm.coef_[0]

Xdb = [-3, 3]
ydb = [-(b0+b[0]*x)/b[1] for x in Xdb]
plt.plot(Xdb, ydb)
plt.xlabel('total sulfur dioxide')
plt.ylabel('volatile acidity')

value = getPrecision(lm, X, y)
print(value)

In [None]:
X = wineData[["total sulfur dioxide", "volatile acidity"]]
y = wineData.quality

clf = svm.SVC(kernel='linear', class_weight={0: 9, 1: 10})
clf.fit(X, y)

y_pred = clf.predict(X)
p,r,f,s = precision_recall_fscore_support(y, y_pred, zero_division=1)
display('precision = {}'.format(p))
display('recall = {}'.format(r))
display('f-score = {}'.format(f))

import random
n = 5000
newx = [random.uniform(-2,2) for _ in range(n)]
newy = [random.uniform(-2,4) for _ in range(n)]
df_temp = pd.DataFrame({'total sulfur dioxide':newx,'volatile acidity':newy})
newlabel = clf.predict(df_temp)

color = ['r' if y_ == 0 else 'b' for y_ in newlabel]
plt.scatter(newx, newy, c=color, marker='o', s=7);

color = ['r' if y_ == 0 else 'b' for y_ in y]
plt.scatter(wineData['total sulfur dioxide'], wineData['volatile acidity'], c=color, s=30, marker='x')

In [None]:
import numpy as np
from sklearn.svm import SVC


X = wineData[["total sulfur dioxide", "volatile acidity"]]
y = wineData.quality

clf = SVC(kernel='poly', degree=3 , class_weight={0: 3, 1: 6})
clf.fit(X, y)

y_pred = clf.predict(X)
p,r,f,s = precision_recall_fscore_support(y, y_pred,zero_division=1)
display('precision = {}'.format(p))
display('recall = {}'.format(r))
display('f-score = {}'.format(f))

# add random points
import random
n = 5000
newx = [random.uniform(-2,4) for _ in range(n)]
newy = [random.uniform(-2,4.5) for _ in range(n)]
df_temp = pd.DataFrame({'total sulfur dioxide':newx,'volatile acidity':newy})
newlabel = clf.predict(df_temp)

color = ['r' if y_ == 0 else 'b' for y_ in newlabel]
plt.scatter(newx, newy, c=color, marker='o', s=7);

color = ['r' if y_ == 0 else 'b' for y_ in y]
plt.scatter(wineData['total sulfur dioxide'], wineData['volatile acidity'], c=color, s=30, marker='x')

In [None]:
import numpy as np
from sklearn.svm import SVC
import random

# Load the dataset and split into features and target
X = wineData[["total sulfur dioxide", "volatile acidity"]]
y = wineData.quality

# Compute the class frequencies
class_freq = np.bincount(y)

# Initialize the classifier with the computed class weights
clf = svm.SVC(kernel='rbf', gamma=5000)

# Train the classifier on the data
clf.fit(X, y)

# Evaluate the classifier on the data
y_pred = clf.predict(X)
p, r, f, s = precision_recall_fscore_support(y, y_pred, zero_division=1)

print('Precision = {}, Recall = {}, F-score = {}'.format(p, r, f))


import random
n = 5000
newx = [random.uniform(-2,2) for _ in range(n)]
newy = [random.uniform(-2,3) for _ in range(n)]
df_temp = pd.DataFrame({'total sulfur dioxide':newx,'volatile acidity':newy})
newlabel = clf.predict(df_temp)

color = ['r' if y_ == 0 else 'b' for y_ in newlabel]
plt.scatter(newx, newy, c=color, marker='o', s=7);

color = ['r' if y_ == 0 else 'b' for y_ in y]
plt.scatter(wineData['total sulfur dioxide'], wineData['volatile acidity'], c=color, s=30, marker='x')

In [None]:
X = wineData[["total sulfur dioxide", "volatile acidity"]]
y = wineData.quality

clf = SVC(kernel='poly', degree=3 , class_weight={0: 1, 1: 2})
clf.fit(X, y)

y_pred = clf.predict(X)
p,r,f,s = precision_recall_fscore_support(y, y_pred)
print("class_weight value 1")
display('precision = {}'.format(p))
display('recall = {}'.format(r))
display('f-score = {}'.format(f))
/

clf = SVC(kernel='poly', degree=3 , class_weight={0: 3, 1: 6})
clf.fit(X, y)

y_pred = clf.predict(X)
p,r,f,s = precision_recall_fscore_support(y, y_pred)
print("class_weight value 2")
display('precision = {}'.format(p))
display('recall = {}'.format(r))
display('f-score = {}'.format(f))


clf = SVC(kernel='poly', degree=3 , class_weight={0: 4, 1: 6})
clf.fit(X, y)

y_pred = clf.predict(X)
p,r,f,s = precision_recall_fscore_support(y, y_pred)
print("class_weight value 3")
display('precision = {}'.format(p))
display('recall = {}'.format(r))
display('f-score = {}'.format(f))


clf = SVC(kernel='poly', degree=3 , class_weight={0: 5, 1: 8})
clf.fit(X, y)

y_pred = clf.predict(X)
p,r,f,s = precision_recall_fscore_support(y, y_pred)
print("class_weight value 4")
display('precision = {}'.format(p))
display('recall = {}'.format(r))
display('f-score = {}'.format(f))



clf = SVC(kernel='poly', degree=3 , class_weight={0: 6, 1: 9})
clf.fit(X, y)

y_pred = clf.predict(X)
p,r,f,s = precision_recall_fscore_support(y, y_pred)
print("class_weight value 5")
display('precision = {}'.format(p))
display('recall = {}'.format(r))
display('f-score = {}'.format(f))



clf = SVC(kernel='poly', degree=3 , class_weight={0: 7, 1: 8})
clf.fit(X, y)

y_pred = clf.predict(X)
p,r,f,s = precision_recall_fscore_support(y, y_pred)
print("class_weight value 6")
display('precision = {}'.format(p))
display('recall = {}'.format(r))
display('f-score = {}'.format(f))