In [14]:
#Importing the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from river import stream, optim, evaluate
from river.linear_model import LogisticRegression
from river.ensemble import AdaptiveRandomForestClassifier
from river.naive_bayes import GaussianNB
from river.neighbors import KNNClassifier
from river.multiclass import OneVsRestClassifier
from river.preprocessing import StandardScaler
from river.compose import Pipeline
from river.metrics import Accuracy
from sklearn.utils import shuffle

In [15]:
#Getting the dataframe
df = pd.read_csv('winequality-white.csv')
#Shuffling the rows
df = shuffle(df, random_state=11).reset_index(drop=True)
#Saving the dataframe
df.to_csv("winequality-white-shuffled.csv", index=False)

In [16]:
# Construct our data dictionary which maps the data types of the columns in the CSV file to built-in data types
print("[INFO] building column names...")
types = {'fixed acidity':float, 'volatile acidity':float, 'citric acid':float, 'residual sugar':float, 'chlorides':float,
         'free sulfur dioxide':float, 'total sulfur dioxide':float, 'density':float, 'pH':float, 'sulphates':float,
         'alcohol':float}
types["quality"] = int

[INFO] building column names...


In [43]:
learning_rates = [0.001]
losses = [(optim.losses.Log(), 'Log')]

log_accuracies = []
hinge_accuracies = []

for rate in learning_rates:
    for loss in losses:
        
        #Getting the dataset
        #It's necessary to reload the dataset every cycle because it was previously emptied by simulating a stream line by line
        dataset = stream.iter_csv("winequality-white-shuffled.csv", target="quality", converters=types)
        
        #Constructing our pipeline (standardize features + model)
        model = (
            StandardScaler() | 
            #OneVsRestClassifier(classifier=LogisticRegression(intercept_lr = rate, loss=loss[0]))
            KNNClassifier(n_neighbors=6)
        )

        #Initializing our metric
        print("[INFO] starting training...   [Parameters: learning rate = {}, loss = {}]".format(rate, loss[1]))
        metric = Accuracy()
        
        #Looping over the dataset one row at a time
        evaluate.progressive_val_score(dataset, model, metric)
    
        #Saving the results as raw numbers
        if loss[1] == 'Log':
            log_accuracies.append(float(str(metric).split(':')[1].strip()[:-1]))
        else:
            hinge_accuracies.append(float(str(metric).split(':')[1].strip()[:-1]))
            
        #Showing the accuracy of the model    
        print("[INFO] final - {}".format(metric))

[INFO] starting training...   [Parameters: learning rate = 0.001, loss = Log]
[INFO] final - Accuracy: 52.87%


In [45]:
model['KNNClassifier'].n_neighbors

6