In [1]:
from pprint import pprint
from river import datasets

dataset = datasets.Bikes()

for x, y in dataset:
    pprint(x)
    print(f'Number of available bikes: {y}')
    break
    
from river import compose
from river import linear_model
from river import metrics
from river import evaluate
from river import preprocessing
from river import optim

model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))

metric = metrics.MAE()

evaluate.progressive_val_score(dataset, model, metric, print_every=20_000)

Downloading https://maxhalford.github.io/files/datasets/toulouse_bikes.zip (1.12 MB)
Uncompressing into /home/jbris/river_data/Bikes
{'clouds': 75,
 'description': 'light rain',
 'humidity': 81,
 'moment': datetime.datetime(2016, 4, 1, 0, 0, 7),
 'pressure': 1017.0,
 'station': 'metro-canal-du-midi',
 'temperature': 6.54,
 'wind': 9.3}
Number of available bikes: 1
[20,000] MAE: 4.912763
[40,000] MAE: 5.333578
[60,000] MAE: 5.330969
[80,000] MAE: 5.392334
[100,000] MAE: 5.423078
[120,000] MAE: 5.541239
[140,000] MAE: 5.613038
[160,000] MAE: 5.622441
[180,000] MAE: 5.567836
[182,470] MAE: 5.563905


MAE: 5.563905

In [2]:
from river import feature_extraction
from river import stats

def get_hour(x):
    x['hour'] = x['moment'].hour
    return x

model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model += (
    get_hour |
    feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())
)
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))

metric = metrics.MAE()

evaluate.progressive_val_score(dataset, model, metric, print_every=20_000)

[20,000] MAE: 3.720766
[40,000] MAE: 3.829739
[60,000] MAE: 3.844905
[80,000] MAE: 3.910137
[100,000] MAE: 3.888553
[120,000] MAE: 3.923644
[140,000] MAE: 3.980882
[160,000] MAE: 3.949972
[180,000] MAE: 3.934489
[182,470] MAE: 3.933442


MAE: 3.933442

In [3]:
import itertools

model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')
model += (
    get_hour |
    feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())
)
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression()

for x, y in itertools.islice(dataset, 10000):
    y_pred = model.predict_one(x)
    model.learn_one(x, y)

x, y = next(iter(dataset))
print(model.debug_one(x))

0. Input
--------
clouds: 75 (int)
description: light rain (str)
humidity: 81 (int)
moment: 2016-04-01 00:00:07 (datetime)
pressure: 1,017.00000 (float)
station: metro-canal-du-midi (str)
temperature: 6.54000 (float)
wind: 9.30000 (float)

1. Transformer union
--------------------
    1.0 Select
    ----------
    clouds: 75 (int)
    humidity: 81 (int)
    pressure: 1,017.00000 (float)
    temperature: 6.54000 (float)
    wind: 9.30000 (float)

    1.1 get_hour | y_mean_by_station_and_hour
    -----------------------------------------
    y_mean_by_station_and_hour: 4.43243 (float)

clouds: 75 (int)
humidity: 81 (int)
pressure: 1,017.00000 (float)
temperature: 6.54000 (float)
wind: 9.30000 (float)
y_mean_by_station_and_hour: 4.43243 (float)

2. StandardScaler
-----------------
clouds: 0.47566 (float)
humidity: 0.42247 (float)
pressure: 1.05314 (float)
temperature: -1.22098 (float)
wind: 2.21104 (float)
y_mean_by_station_and_hour: -0.59098 (float)

3. LinearRegression
-----------------

In [4]:
import datetime as dt

evaluate.progressive_val_score(
    dataset=dataset,
    model=model.clone(),
    metric=metrics.MAE(),
    moment='moment',
    delay=dt.timedelta(minutes=30),
    print_every=20_000
)

[20,000] MAE: 20.198137
[40,000] MAE: 12.199763
[60,000] MAE: 9.468279
[80,000] MAE: 8.126625
[100,000] MAE: 7.273133
[120,000] MAE: 6.735469
[140,000] MAE: 6.376704
[160,000] MAE: 6.06156
[180,000] MAE: 5.806744
[182,470] MAE: 5.780772


MAE: 5.780772