In [4]:
import pandas as pd 
from utils.timeseries import make_lags, make_multistep_target

flu_trends = pd.read_csv("data/flu-trends.csv")
y = flu_trends['FluVisits'].copy()
X = make_lags(y, 4)
X

Unnamed: 0,y_lag_1,y_lag_2,y_lag_3,y_lag_4
0,0.0,0.0,0.0,0.0
1,180.0,0.0,0.0,0.0
2,115.0,180.0,0.0,0.0
3,132.0,115.0,180.0,0.0
4,109.0,132.0,115.0,180.0
...,...,...,...,...
361,124.0,156.0,211.0,305.0
362,90.0,124.0,156.0,211.0
363,55.0,90.0,124.0,156.0
364,33.0,55.0,90.0,124.0


In [5]:
y = make_multistep_target(y, steps=8)
y

Unnamed: 0,y_step_1,y_step_2,y_step_3,y_step_4,y_step_5,y_step_6,y_step_7,y_step_8
0,180,115.0,132.0,109.0,120.0,115.0,123.0,205.0
1,115,132.0,109.0,120.0,115.0,123.0,205.0,454.0
2,132,109.0,120.0,115.0,123.0,205.0,454.0,628.0
3,109,120.0,115.0,123.0,205.0,454.0,628.0,687.0
4,120,115.0,123.0,205.0,454.0,628.0,687.0,821.0
...,...,...,...,...,...,...,...,...
354,616,578.0,455.0,305.0,211.0,156.0,124.0,90.0
355,578,455.0,305.0,211.0,156.0,124.0,90.0,55.0
356,455,305.0,211.0,156.0,124.0,90.0,55.0,33.0
357,305,211.0,156.0,124.0,90.0,55.0,33.0,27.0


In [6]:
y, X = y.align(X, join="inner", axis=0)
y.shape, X.shape

((359, 8), (359, 4))

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt 
from utils.timeseries import plot_multistep

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.25)

model = LinearRegression()
model.fit(X_train, y_train)

y_fit = pd.DataFrame(model.predict(X_train), index=X_train.index, columns=y.columns)
y_pred = pd.DataFrame(model.predict(X_test), index=X_test.index, columns=y.columns)

y_fit

Unnamed: 0,y_step_1,y_step_2,y_step_3,y_step_4,y_step_5,y_step_6,y_step_7,y_step_8
0,28.079850,67.535191,110.102710,153.156716,197.280630,240.687856,276.813034,307.382720
1,296.928680,360.793442,425.366943,463.901064,489.023041,476.204228,465.969402,465.721786
2,90.912369,131.237771,149.578386,178.011476,182.835672,228.378204,273.812963,297.607147
3,188.758284,208.777014,233.320469,241.995135,290.235135,327.032434,345.184592,368.982347
4,105.120911,128.966953,155.169063,189.992365,213.930748,243.852517,272.948251,294.152355
...,...,...,...,...,...,...,...,...
264,38.685780,74.887184,114.423051,154.924177,197.829080,240.669620,276.021452,305.954080
265,40.709083,77.755855,118.593302,160.115966,202.995585,244.374974,278.762047,308.126104
266,46.214173,84.608335,126.127891,167.901989,209.565602,249.641389,283.274837,311.827166
267,44.544494,82.660009,123.201446,164.210128,205.586312,246.894820,281.444773,310.423364


In [12]:
y_pred

Unnamed: 0,y_step_1,y_step_2,y_step_3,y_step_4,y_step_5,y_step_6,y_step_7,y_step_8
269,47.100337,84.883562,126.162075,167.311492,209.825057,250.207311,283.654452,312.403024
270,55.892505,95.054026,136.675357,177.886897,218.165656,256.665631,289.113997,316.611490
271,70.311121,109.863705,150.933058,190.394310,229.238803,266.220484,297.152888,323.449643
272,74.375308,112.615330,151.658743,189.518258,227.133712,264.614526,295.904945,322.089465
273,74.132494,110.131429,147.487402,184.076652,222.279166,260.683261,292.300392,318.926953
...,...,...,...,...,...,...,...,...
354,516.796163,391.120264,292.082648,244.817163,202.197263,185.476604,177.057839,163.603480
355,527.736513,433.614891,376.587446,343.746771,333.696267,304.773898,277.978041,263.719032
356,506.147122,453.838827,421.989429,413.542560,383.362315,343.319737,318.983528,298.153698
357,364.267675,315.644446,275.826780,260.988097,246.008010,249.662749,256.005252,256.236528


In [13]:
from sklearn.multioutput import MultiOutputRegressor
from xgboost import XGBRegressor

model = MultiOutputRegressor(XGBRegressor())
model.fit(X_train, y_train)

y_fit = pd.DataFrame(model.predict(X_train), index=X_train.index, columns=y.columns)
y_pred = pd.DataFrame(model.predict(X_test), index=X_test.index, columns=y.columns)

y_fit

Unnamed: 0,y_step_1,y_step_2,y_step_3,y_step_4,y_step_5,y_step_6,y_step_7,y_step_8
0,179.963470,114.962440,131.977753,108.950233,119.846031,115.143097,122.936783,204.927170
1,115.092194,132.112961,108.954773,119.991966,114.930176,122.989395,204.864517,453.730347
2,132.063324,108.971245,120.192566,115.159683,123.096924,205.186890,453.950256,627.999512
3,108.996727,120.030716,115.080200,123.031761,204.975281,454.049469,628.495605,686.822021
4,119.548836,116.118828,123.379036,205.199997,453.826813,627.300720,685.336243,819.167419
...,...,...,...,...,...,...,...,...
264,13.941550,17.622795,17.594597,14.339076,18.125843,25.881010,37.113613,44.890949
265,17.514704,16.814867,13.497506,16.480196,24.378937,35.195248,41.972095,44.483524
266,18.050623,14.575546,18.252205,25.805824,37.491409,43.982067,46.007870,49.985329
267,14.685186,17.941523,26.155249,39.335205,44.973240,46.968201,52.049313,61.603680


In [14]:
y_pred

Unnamed: 0,y_step_1,y_step_2,y_step_3,y_step_4,y_step_5,y_step_6,y_step_7,y_step_8
269,18.245794,17.323479,19.098400,20.492519,26.408344,32.855202,37.908566,42.499847
270,24.192345,30.322746,34.457645,37.097775,43.903019,45.276638,68.691139,61.757587
271,33.322083,50.853527,54.987492,60.720234,76.287964,87.913643,89.772812,119.352234
272,56.067348,63.330967,66.415344,77.312279,89.237839,88.034302,120.132378,117.710823
273,53.529842,55.436802,56.173000,58.474163,72.470360,79.639618,71.565102,67.048073
...,...,...,...,...,...,...,...,...
354,512.572937,452.073608,384.960846,335.463989,334.840393,319.679962,304.224304,264.321503
355,454.169312,350.971466,264.244049,241.987152,236.598450,275.375244,131.806686,88.989365
356,501.683380,286.629669,796.849060,198.773727,198.785904,927.782349,653.576538,162.396317
357,388.092133,244.984421,248.868042,128.936707,99.660362,143.027161,120.265152,138.363922
