In [1]:
# imports
import pandas as pd
#from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima.arima import auto_arima

In [2]:
# read data
dtype = {'ID_LAT_LON_YEAR_WEEK':'string',
         'latitude': 'string',
         'longitude': 'string',
         'year': 'int',
         'week_no': 'int',
         'emission': 'float'}
df = pd.read_csv('files/train.csv', dtype=dtype)
# df

In [3]:
# prepare data
df = df[['ID_LAT_LON_YEAR_WEEK', 'year', 'week_no', 'emission']]
df['id'] = df['ID_LAT_LON_YEAR_WEEK'].str[:16]
day_of_week = {2019:2, 2020:3, 2021:5, 2022:6, 2023:0}
df.loc[:, 'day_of_week'] = df['year'].map(day_of_week)
df.loc[:, 'date'] = df['year'].astype('string') + '-' + df['week_no'].astype('string') + '-' + df['day_of_week'].astype('string')
df.loc[:, 'date'] = pd.to_datetime(df['date'], format='%Y-%W-%w')
df = df[['id', 'date', 'emission']]
# df

In [7]:
# function to create model and forecast
def autoarima(data, endog_name='emission', steps=49):
    autoarima = auto_arima(y = df_id.loc[:, endog_name].values)
    #print(type(autoarima))
    #print(autoarima)
    autoarima = autoarima.fit(y = df_id.loc[:, endog_name].values,
                              full_output = False,
                              disp = False)
    return autoarima.predict(n_periods = steps)

In [8]:
%%time
# run arima function for every location

groups = df.groupby('id')
results = {}

for name, group in groups:
    #print(name)
    df_id = group.drop(columns='id').set_index('date', drop=True).resample('W').nearest()
    results[name] = autoarima(df_id)
df_results = pd.DataFrame(results).T.reset_index()
# df_results

ID_-0.510_29.290
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-0.528_29.472
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-0.547_29.653
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-0.569_30.031
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-0.598_29.102
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-0.604_29.896
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-0.615_30.885
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-0.627_29.773
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-0.637_30.763
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(5,0,1)(0,0,0)[0] intercept
ID_-0.653_30.447
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(5,0,1)(0,0,0)[0] intercept
ID_-0.659_30.341
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(5,0,1)(0,0,0)[0] intercept
ID_-0.667_30.433
<class 'pmdarim



<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,1)(0,0,0)[0] intercept
ID_-1.335_29.465
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-1.339_29.961
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,1,0)(0,0,0)[0]          
ID_-1.342_29.258
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-1.342_30.158
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-1.342_31.358
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-1.346_31.254
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-1.348_29.552
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-1.355_30.245
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,1)(0,0,0)[0] intercept
ID_-1.355_30.645
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,1,0)(0,0,0)[0]          
ID_-1.360_30.340
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-1.364_29.036
<class 'pmdarima.arima.arima.ARI



<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-1.510_30.190
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,0)(0,0,0)[0] intercept
ID_-1.514_29.686
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-1.516_28.984
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,1,0)(0,0,0)[0]          
ID_-1.523_30.677
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,1,2)(0,0,0)[0]          
ID_-1.526_29.874
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,1,1)(0,0,0)[0]          
ID_-1.528_29.972
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-1.528_31.372
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-1.529_30.971
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-1.530_30.870
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-1.532_28.568




<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,0)(0,0,0)[0]          
ID_-1.533_28.467
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,1,0)(0,0,0)[0]          
ID_-1.533_29.967
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,0)(0,0,0)[0] intercept
ID_-1.534_30.366
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,0)(0,0,0)[0] intercept
ID_-1.534_31.466
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-1.536_28.364
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,1,0)(0,0,0)[0]          
ID_-1.555_30.945
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-1.581_30.819
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-1.583_28.517
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,1,0)(0,0,0)[0]          
ID_-1.593_30.207
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-1.596_29.404
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-1.597_29.603
<class 'pmdarima.arima.arima.ARI



<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,0)(0,0,0)[0]          
ID_-1.683_29.617
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,0)(0,0,0)[0] intercept
ID_-1.683_31.217
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-1.690_28.510
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,1,0)(0,0,0)[0]          
ID_-1.695_31.405
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,0)(0,0,0)[0] intercept
ID_-1.696_28.304
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,0)(0,0,0)[0]          
ID_-1.696_30.704




<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,2)(0,0,0)[0] intercept
ID_-1.700_30.900
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-1.712_28.688
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,0)(0,0,0)[0]          




ID_-1.718_30.382
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,1,0)(0,0,0)[0]          
ID_-1.729_30.471
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-1.730_28.470
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,0)(0,0,0)[0]          
ID_-1.737_30.063




<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,1)(0,0,0)[0] intercept
ID_-1.743_29.457
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-1.753_28.547
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-1.764_31.436
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-1.770_29.130
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(3,0,0)(0,0,0)[0] intercept
ID_-1.770_31.030
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-1.783_31.117
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,1,0)(0,0,0)[0]          
ID_-1.792_30.408
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,1,0)(0,0,0)[0]          
ID_-1.797_30.303
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,1,0)(0,0,0)[0]          
ID_-1.799_28.601
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,1,0)(0,0,0)[0]          
ID_-1.802_29.798
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-1.802_31.198
<class 'pmdarima.arima.arima.ARI



<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(4,1,2)(0,0,0)[0]          
ID_-1.844_30.356
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-1.845_29.355
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,0)(0,0,0)[0] intercept
ID_-1.846_29.554
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-1.847_28.753
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,1,0)(0,0,0)[0]          
ID_-1.847_30.953
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-1.851_30.949
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-1.852_29.948
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-1.858_31.442
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-1.861_30.939
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,1,0)(0,0,0)[0]          
ID_-1.868_29.832
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,0)(0,0,0)[0] intercept
ID_-1.871_29.729
<class 'pmdarima.arima.arima.ARI



<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-1.981_31.219
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,0)(0,0,0)[0] intercept
ID_-1.990_30.410
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-2.015_30.985
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-2.016_31.184
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,1,1)(0,0,0)[0]          
ID_-2.018_28.882
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,1,0)(0,0,0)[0]          
ID_-2.032_29.168
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,0)(0,0,0)[0] intercept
ID_-2.041_30.559
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-2.049_29.151
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-2.049_29.551
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,0)(0,0,0)[0] intercept
ID_-2.049_30.751
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,1)(0,0,0)[0] intercept
ID_-2.051_29.949
<class 'pmdarima.arima.arima.ARI



<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-2.521_29.179
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-2.522_29.078
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,1)(0,0,0)[0] intercept
ID_-2.539_28.461
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,1,0)(0,0,0)[0]          
ID_-2.548_29.752
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,1,0)(0,0,0)[0]          
ID_-2.555_29.145
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,2)(0,0,0)[0] intercept
ID_-2.561_29.839
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,1,0)(0,0,0)[0]          
ID_-2.571_30.029
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,1)(0,0,0)[0] intercept
ID_-2.583_29.617
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,0)(0,0,0)[0] intercept
ID_-2.588_29.912
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,0)(0,0,0)[0] intercept
ID_-2.589_28.311
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,1)(0,0,0)[0] intercept
ID_-2.595_28.905
<class 'pmdarima.arima.arima.ARI



<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,0)(0,0,0)[0]          
ID_-2.817_29.783
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-2.818_29.582
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-2.819_29.981
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-2.820_28.380
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-2.821_29.479
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,0)(0,0,0)[0] intercept
ID_-2.828_29.872
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-2.834_30.466
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-2.836_30.064
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,1,0)(0,0,0)[0]          
ID_-2.836_30.364
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-2.839_28.861
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,1)(0,0,0)[0] intercept
ID_-2.841_28.659
<class 'pmdarima.arima.arima.ARI



ID_-2.842_28.958
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-2.846_28.454
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,1,0)(0,0,0)[0]          
ID_-2.851_30.249
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-2.853_28.547
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-2.855_29.145
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-2.858_28.642
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,1)(0,0,0)[0] intercept
ID_-2.859_29.041
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,0)(0,0,0)[0]          
ID_-2.859_29.441




<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,1,0)(0,0,0)[0]          
ID_-2.860_28.740
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-2.879_30.621
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-2.883_29.017
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,3)(0,0,0)[0] intercept
ID_-2.896_29.404
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-2.898_29.002
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,0,0)(0,0,0)[0]          
ID_-2.899_29.601




<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,1,0)(0,0,0)[0]          
ID_-2.900_29.300
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-2.902_30.398
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,1,1)(0,0,0)[0]          
ID_-2.922_29.178
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,1,1)(0,0,0)[0]          
ID_-2.924_29.976
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-2.929_29.771
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,0,1)(0,0,0)[0] intercept
ID_-2.931_29.569
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(1,1,0)(0,0,0)[0]          
ID_-2.935_29.565
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,1,0)(0,0,0)[0]          
ID_-2.939_30.461
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(0,1,0)(0,0,0)[0]          
ID_-2.944_29.356
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-2.946_30.454
<class 'pmdarima.arima.arima.ARIMA'>
 ARIMA(2,0,0)(0,0,0)[0] intercept
ID_-2.955_30.245
<class 'pmdarima.arima.arima.ARI

In [9]:
# format output
df_results = df_results.melt(id_vars=['index'], value_vars=range(49))
df_results['id'] = df_results['index'] + '_2022_' + df_results['variable'].astype('string').str.zfill(2)
df_results = df_results[['id', 'value']]
df_results = df_results.sort_values('id').reset_index(drop=True)
df_results.columns = ['ID_LAT_LON_YEAR_WEEK', 'emission']
df_results

Unnamed: 0,ID_LAT_LON_YEAR_WEEK,emission
0,ID_-0.510_29.290_2022_00,4.311202
1,ID_-0.510_29.290_2022_01,4.298146
2,ID_-0.510_29.290_2022_02,4.297820
3,ID_-0.510_29.290_2022_03,4.294875
4,ID_-0.510_29.290_2022_04,4.292774
...,...,...
24348,ID_-3.299_30.301_2022_44,27.484273
24349,ID_-3.299_30.301_2022_45,27.484273
24350,ID_-3.299_30.301_2022_46,27.484273
24351,ID_-3.299_30.301_2022_47,27.484273


In [10]:
# save output to csv
df_results.to_csv('output/autoarima.csv', index=False)