In [1]:
import numpy as np

from sklearn.linear_model import MultiTaskLasso, Lasso, ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from tqdm import tqdm

In [16]:
in_seq_len = 7
out_seq_len = 6
out_freq = 5
days = 164
daily_slots_5_min = 180
daily_slots_1_min = 900
seed = 42

input = np.load("../data/new_X.npy")  # (29520, 1796)
output_tmc = np.load("../data/new_Y_tmc.npy")[..., :3]  # (29520, 70, 3)
output_xd = np.load("../data/new_Y_xd.npy")[..., 0]  # (147600, 70)

In [17]:
# prepare X
X = []
for d in range(days):
    for i in range(daily_slots_5_min-in_seq_len+1):
        X.append(input[d*days+i : d*days+i+in_seq_len])
X = np.stack(X, axis=0) # (28536, 7, 1796)

### Part 1 - Fit TMC Speed Data 

In [18]:
# prepare Y_tmc
Y_tmc = []
for d in range(days):
    for i in range(1, daily_slots_5_min-out_seq_len+1):
        Y_tmc.append(output_tmc[d*days+i : d*days+i+out_seq_len])
Y_tmc = np.stack(Y_tmc, axis=0) # (28536, 6, 70, 3)

# prepare Y_xd
Y_xd = []
for d in range(days):
    for i in range(1, daily_slots_5_min-out_seq_len+1):
        Y_xd.append(output_xd[[d*days+j*out_freq for j in range(i, i+out_seq_len)],...])
Y_xd = np.stack(Y_xd, axis=0) # (28536, 6, 70)

In [19]:
X_train_tmc, X_test_tmc, Y_train_tmc, Y_test_tmc =  train_test_split(X, Y_tmc, test_size=0.3, random_state=seed)
X_train_xd, X_test_xd, Y_train_xd, Y_test_xd =  train_test_split(X, Y_xd, test_size=0.3, random_state=seed)

In [20]:
alpha_xd = [1]#[0.0005] #[0.0005*i for i in range(1, 5)]

mse_mat_xd = np.zeros(out_seq_len)  # (6,)
mape_mat_xd = np.zeros(out_seq_len)  # (6,)
selected_alphas_xd = np.zeros([out_seq_len, 70])  # (6, 70), we will fit 6*70*3 Lasso regression model

for i in tqdm(range(out_seq_len)): 
    temp_x_train = X_train_xd.reshape(X_train_xd.shape[0], -1)  # (train instance count, 7*1796)
    temp_x_test = X_test_xd.reshape(X_test_xd.shape[0], -1)  # (test instance count, 7*1796)
    
    temp_y_train = Y_train_xd[:, i, :]  # (train instance count, 70)
    temp_y_test = Y_test_xd[:, i, :]  # (test instance count, 70)

    model = MultiTaskLasso(tol=1, selection="random")
    model.fit(temp_x_train, temp_y_train)

    pred_train = model.predict(temp_x_train)
    pred_test = model.predict(temp_x_test)

    # print(pred_train.shape, pred_test.shape)
    # print(np.sum((temp_y_train-pred_train)**2), np.sum((temp_y_test-pred_test)**2))
    # mse_train = np.mean((temp_y_train-pred_train)**2)
    # mse_test = np.mean((temp_y_test-pred_test)**2)
    # mape_train = np.mean(np.nan_to_num(np.abs((temp_y_train-pred_train))/np.maximum(np.abs(temp_y_train), np.abs(pred_train)), nan=1))
    # mape_test =  np.mean(np.nan_to_num(np.abs((temp_y_test-pred_test))/np.maximum(np.abs(temp_y_test), np.abs(pred_test)), nan=1))
    mse_train = mean_squared_error(y_true=temp_y_train, y_pred=pred_train)
    mse_test = mean_squared_error(y_true=temp_y_test, y_pred=pred_test)
    mape_train = mean_absolute_percentage_error(y_true=temp_y_train, y_pred=pred_train)
    mape_test = mean_absolute_percentage_error(y_true=temp_y_test, y_pred=pred_test)
    
    print(mse_train, mse_test, mape_train, mape_test)
        
    mse_mat_xd[i] += (mse_train*temp_y_train.shape[0] + mse_test*temp_y_test.shape[0])/Y_xd.shape[0]
    mape_mat_xd[i] += (mape_train*temp_y_train.shape[0] + mape_test*temp_y_test.shape[0])/Y_xd.shape[0]
    print((mse_train*temp_y_train.shape[0] + mse_test*temp_y_test.shape[0])/Y_xd.shape[0], (mape_train*temp_y_train.shape[0] + mape_test*temp_y_test.shape[0])/Y_xd.shape[0])

 17%|█▋        | 1/6 [03:15<16:18, 195.62s/it]

38.32980447648753 38.08129508704179 0.142299911628207 0.1424357453092541
38.255249917928346 0.14234066268453738


 33%|███▎      | 2/6 [05:58<11:46, 176.51s/it]

38.389749208155116 37.95248221776344 0.1425264145998639 0.14181101418697892
38.25856604636849 0.14231178946197814


 50%|█████     | 3/6 [08:06<07:42, 154.19s/it]

38.391318357842806 38.01106572710128 0.1425535764987227 0.14171494256644634
38.27723990354725 0.1423019804413139


 67%|██████▋   | 4/6 [10:12<04:46, 143.24s/it]

38.44416829034362 37.94174427785149 0.14259193409295703 0.14186023512640752
38.293437565261435 0.14237241927474037


 83%|████████▎ | 5/6 [12:24<02:19, 139.19s/it]

38.4468060476189 37.97376834462622 0.14264941229829275 0.14182866374589076
38.30489142134611 0.14240318198019936


100%|██████████| 6/6 [14:50<00:00, 148.41s/it]

38.39920134268665 38.1758649234747 0.14261197820111857 0.1420812720401062
38.33219885162716 0.1424527626332595





In [24]:
mse_mat_xd = mse_mat_xd**0.5

In [25]:
np.save("../data/lasso_mse_xd.npy", mse_mat_xd)
np.save("../data/lasso_mape_xd.npy", mape_mat_xd)

In [26]:
mse_mat_xd

array([6.18508285, 6.18535092, 6.18686026, 6.18816916, 6.18909456,
       6.19130026])

In [27]:
mape_mat_xd

array([0.14234066, 0.14231179, 0.14230198, 0.14237242, 0.14240318,
       0.14245276])

In [6]:
alpha_tmc = [0.0005] #[0.0005*i for i in range(1, 5)]

mse_mat_tmc = np.zeros([out_seq_len, 3])  # (6, 3)
mape_mat_tmc = np.zeros([out_seq_len, 3])  # (6, 3)
selected_alphas_tmc = np.zeros([out_seq_len, 70, 3])  # (6, 70, 3), we will fit 6*70*3 Lasso regression model

for i in range(out_seq_len): 
    for j in tqdm(range(3)):
        temp_x_train = X_train_tmc.reshape(X_train_tmc.shape[0], -1)  # (train instance count, 7*1796)
        temp_x_test = X_test_tmc.reshape(X_test_tmc.shape[0], -1)  # (test instance count, 7*1796)
        temp_y_train = Y_train_tmc[:, i, :, j]  # (train instance count, 1)
        temp_y_test = Y_test_tmc[:, i, :, j]  # (test instance count, 1)

        model = MultiTaskLasso(tol=1, selection="random")
        model.fit(temp_x_train, temp_y_train)

        pred_train = model.predict(temp_x_train)
        pred_test = model.predict(temp_x_test)

        mse_train = mean_squared_error(y_true=temp_y_train, y_pred=pred_train)
        mse_test = mean_squared_error(y_true=temp_y_test, y_pred=pred_test)
        mape_train = mean_absolute_percentage_error(y_true=temp_y_train, y_pred=pred_train)
        mape_test = mean_absolute_percentage_error(y_true=temp_y_test, y_pred=pred_test)
        print(mse_train, mse_test, mape_train, mape_test)
            
        mse_mat_tmc[i,j] += (mse_train*temp_y_train.shape[0] + mse_test*temp_y_test.shape[0])/Y_tmc.shape[0]
        mape_mat_tmc[i,j] += (mape_train*temp_y_train.shape[0] + mape_test*temp_y_test.shape[0])/Y_tmc.shape[0]
        print((mse_train*temp_y_train.shape[0] + mse_test*temp_y_test.shape[0])/Y_xd.shape[0], (mape_train*temp_y_train.shape[0] + mape_test*temp_y_test.shape[0])/Y_xd.shape[0])


 33%|███▎      | 1/3 [02:20<04:40, 140.13s/it]

56.9265256270181 56.8409886452064 0.1993439914140916 0.1990538080334254
56.900863932972335 0.19925693436608613


 67%|██████▋   | 2/3 [04:29<02:13, 133.92s/it]

54.01916475660943 53.89759165932779 0.20709373055917465 0.20625318720939007
53.982691975356694 0.20684156166313086


100%|██████████| 3/3 [06:03<00:00, 121.28s/it]


61.96307045728562 61.95791595391425 0.2068953869282377 0.20658008867363417
61.96152407014789 0.20680079524202863


 33%|███▎      | 1/3 [02:05<04:11, 125.53s/it]

59.9872032289701 59.933716853997666 0.20531451572164983 0.20508300093566453
59.9711569416089 0.2052450596632387


 67%|██████▋   | 2/3 [04:15<02:08, 128.01s/it]

56.08383411971811 56.20843822206795 0.21203683886592412 0.21194296395455392
56.121216223734685 0.21200867573457283


100%|██████████| 3/3 [06:25<00:00, 128.54s/it]


64.28707737614111 64.36410209412793 0.2119943069189318 0.21188572899938987
64.3101853313796 0.21196173278208016


 33%|███▎      | 1/3 [02:12<04:25, 132.85s/it]

60.179245482744136 59.59844404043136 0.20646700486930156 0.20456213025032122
60.005000979392584 0.20589552913293027


 67%|██████▋   | 2/3 [04:12<02:05, 125.00s/it]

56.78754369846664 56.364359210165034 0.21404986316173374 0.21180528375342664
56.660585386006936 0.2133764736076786


100%|██████████| 3/3 [06:33<00:00, 131.28s/it]


64.84669509815895 64.28673623752735 0.21369580237202482 0.21197120776818562
64.67870351539096 0.21317841190372275


 33%|███▎      | 1/3 [02:34<05:08, 154.17s/it]

60.10741255899131 59.6832395844599 0.20607934078021334 0.2052661412827995
59.98015769373467 0.20583537523152537


 67%|██████▋   | 2/3 [04:07<01:58, 118.62s/it]

56.55047296601441 56.3515936750607 0.2133341304855065 0.21256097463944226
56.49080778484484 0.21310217831287695


100%|██████████| 3/3 [06:01<00:00, 120.35s/it]


65.30226241393517 64.77520705822597 0.21413044214640825 0.21314031548083004
65.1441421132544 0.21383339720724318


 33%|███▎      | 1/3 [02:18<04:37, 138.70s/it]

60.05956191502374 59.782077507750664 0.20590590360575822 0.20546279718828053
59.97631464803941 0.20577296857491906


 67%|██████▋   | 2/3 [04:45<02:23, 143.41s/it]

56.64817554349555 56.42959321528344 0.2134738980554668 0.212727474912332
56.58259931305597 0.21324996588107736


100%|██████████| 3/3 [08:00<00:00, 160.23s/it]


64.82678610769062 64.55750751301214 0.21325111938008406 0.21277992898487877
64.74600064199667 0.21310975895909468


 33%|███▎      | 1/3 [02:22<04:45, 142.62s/it]

60.35233420008092 60.15549883039512 0.2061635287451188 0.20625307688001776
60.29328220961694 0.20619039381320367


 67%|██████▋   | 2/3 [04:33<02:15, 135.77s/it]

56.64020693419458 56.78791771791324 0.21336857191201436 0.21368109270365432
56.68452120456938 0.21346233033986795


100%|██████████| 3/3 [06:07<00:00, 122.41s/it]

64.77858999427139 64.69246112077035 0.21303366528505738 0.21321276363578703
64.75275072857043 0.21308739604552124





In [14]:
np.save("../data/lasso_mse_tmc.npy", mse_mat_tmc)
np.save("../data/lasso_mape_tmc.npy", mape_mat_tmc)

In [12]:
mape_mat_tmc

array([[0.19925693, 0.20684156, 0.2068008 ],
       [0.20524506, 0.21200868, 0.21196173],
       [0.20589553, 0.21337647, 0.21317841],
       [0.20583538, 0.21310218, 0.2138334 ],
       [0.20577297, 0.21324997, 0.21310976],
       [0.20619039, 0.21346233, 0.2130874 ]])

In [22]:
mape_mat_tmc.T

array([[0.19925693, 0.20524506, 0.20589553, 0.20583538, 0.20577297,
        0.20619039],
       [0.20684156, 0.21200868, 0.21337647, 0.21310218, 0.21324997,
        0.21346233],
       [0.2068008 , 0.21196173, 0.21317841, 0.2138334 , 0.21310976,
        0.2130874 ]])

In [15]:
mse_mat_tmc

array([[7.54326613, 7.34729147, 7.87156427],
       [7.74410466, 7.4914095 , 8.01936315],
       [7.7462895 , 7.52732259, 8.0423071 ],
       [7.74468577, 7.51603671, 8.07119211],
       [7.74443766, 7.52214061, 8.04648996],
       [7.7648749 , 7.52891235, 8.04690939]])

In [23]:
mse_mat_tmc.T

array([[7.54326613, 7.74410466, 7.7462895 , 7.74468577, 7.74443766,
        7.7648749 ],
       [7.34729147, 7.4914095 , 7.52732259, 7.51603671, 7.52214061,
        7.52891235],
       [7.87156427, 8.01936315, 8.0423071 , 8.07119211, 8.04648996,
        8.04690939]])