In [16]:
#Importing the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from river import stream, optim, evaluate
from river.linear_model import LogisticRegression
from river.ensemble import AdaptiveRandomForestClassifier
from river.naive_bayes import GaussianNB
from river.neighbors import KNNClassifier
from river.multiclass import OneVsRestClassifier
from river.preprocessing import StandardScaler
from river.compose import Pipeline
from river.metrics import Accuracy
from sklearn.utils import shuffle

In [17]:
#Getting the dataframe
df = pd.read_csv('winequality-white.csv')
#Shuffling the rows
df = shuffle(df, random_state=11).reset_index(drop=True)
#Saving the dataframe
df.to_csv("winequality-white-shuffled.csv", index=False)

In [18]:
# Construct our data dictionary which maps the data types of the columns in the CSV file to built-in data types
print("[INFO] building column names...")
types = {'fixed acidity':float, 'volatile acidity':float, 'citric acid':float, 'residual sugar':float, 'chlorides':float,
         'free sulfur dioxide':float, 'total sulfur dioxide':float, 'density':float, 'pH':float, 'sulphates':float,
         'alcohol':float}
types["quality"] = int

[INFO] building column names...


In [25]:
#Getting the dataset
dataset = stream.iter_csv("4000.csv", target="quality", converters=types)

#Constructing our pipeline (standardize features + model)
model = Pipeline(
    StandardScaler(),
    OneVsRestClassifier(classifier=LogisticRegression(loss = optim.losses.Log(), intercept_lr = 0.005)))

#Initializing our metric
print("[INFO] starting training...")
metric = Accuracy()

lr_values = []

#Looping over the dataset one row at a time
for (i, (X, y)) in enumerate(dataset):
    #Make predictions on the current set of features, train the
    #model on the features, and then update our metric
    preds = model.predict_one(X)
    model = model.learn_one(X, y)
    metric = metric.update(y, preds)
    lr_values.append((i, float(str(metric).split(':')[1].strip()[:-1])))
    if i%500 == 0:
        print("[INFO] update {} - {}".format(i, metric))
    
#Showing the accuracy of the model
print("[INFO] final - {}".format(metric))

[INFO] starting training...
[INFO] update 0 - Accuracy: 0.00%
[INFO] update 500 - Accuracy: 42.91%
[INFO] update 1000 - Accuracy: 42.86%
[INFO] update 1500 - Accuracy: 43.44%
[INFO] update 2000 - Accuracy: 45.33%
[INFO] update 2500 - Accuracy: 46.78%
[INFO] update 3000 - Accuracy: 47.05%
[INFO] update 3500 - Accuracy: 47.50%
[INFO] update 4000 - Accuracy: 48.09%
[INFO] final - Accuracy: 48.09%


In [26]:
#Getting the dataset
dataset = stream.iter_csv("900.csv", target="quality", converters=types)

print("[INFO] starting training...")
metric1 = metric

#Looping over the dataset one row at a time
for (i, (X, y)) in enumerate(dataset):
    #Make predictions on the current set of features, train the
    #model on the features, and then update our metric
    preds = model.predict_one(X)
    model = model.learn_one(X, y)
    metric1 = metric1.update(y, preds)
    lr_values.append((i, float(str(metric1).split(':')[1].strip()[:-1])))
    if i%298 == 0:
        print("[INFO] update {} - {}".format(i, metric1))
    
#Showing the accuracy of the model
print("[INFO] final - {}".format(metric1))

[INFO] starting training...
[INFO] update 0 - Accuracy: 48.08%
[INFO] update 298 - Accuracy: 48.12%
[INFO] update 596 - Accuracy: 48.04%
[INFO] update 894 - Accuracy: 48.14%
[INFO] final - Accuracy: 48.14%
