In [1]:
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [2]:
dtype = {'ID_LAT_LON_YEAR_WEEK':'string',
         'latitude': 'string',
         'longitude': 'string',
         'year': 'int',
         'week_no': 'int',
         'emission': 'float'}

In [3]:
df = pd.read_csv('files/train.csv', dtype=dtype)
df = df[['latitude', 'longitude', 'year', 'week_no', 'emission']]
df['id'] = df['latitude'] + '_' + df['longitude']
day_of_week = {2019:2, 2020:3, 2021:5, 2022:6, 2023:0}
df.loc[:, 'day_of_week'] = df['year'].map(day_of_week)
df.loc[:, 'date'] = df['year'].astype('string') + '-' + df['week_no'].astype('string') + '-' + df['day_of_week'].astype('string')
df.loc[:, 'date'] = pd.to_datetime(df['date'], format='%Y-%W-%w')
df = df[['id', 'date', 'emission']]
groups = df.groupby('id')
df

Unnamed: 0,id,date,emission
0,-0.51_29.29,2019-01-01,3.750994
1,-0.51_29.29,2019-01-08,4.025176
2,-0.51_29.29,2019-01-15,4.231381
3,-0.51_29.29,2019-01-22,4.305286
4,-0.51_29.29,2019-01-29,4.347317
...,...,...,...
79018,-3.299_30.301,2021-12-03,29.404171
79019,-3.299_30.301,2021-12-10,29.186497
79020,-3.299_30.301,2021-12-17,29.131205
79021,-3.299_30.301,2021-12-24,28.125792


In [4]:
def arima(data, endog_name='emission', steps=48):
    arima = SARIMAX(endog = df_id.loc[:, endog_name].values,
                    order = (1, 0, 0),
                    dates = df_id.index.values,
                    freq = 'W')
    arima = arima.fit()
    return arima.forecast(steps=steps)

In [5]:
results = {}
for name, group in groups:
    df_id = group.drop(columns='id').set_index('date', drop=True).resample('W').nearest()
    results[name] = arima(df_id)
df_results = pd.DataFrame(results)
df_results



Unnamed: 0,-0.51_29.29,-0.528_29.472,-0.547_29.653,-0.569_30.031,-0.598_29.102,-0.604_29.896,-0.615_30.885,-0.627_29.773,-0.637_30.763,-0.653_30.447,...,-3.095_29.505,-3.099_29.601,-3.133_29.467,-3.136_30.364,-3.138_30.662,-3.153_30.347,-3.161_28.839,-3.174_29.926,-3.287_29.713,-3.299_30.301
0,4.266863,4.313234,0.575033,110.13067,0.087938,68.593552,84.029692,111.037288,43.676049,30.436717,...,4.23141,73.57305,111.924382,15.011766,23.703724,15.020466,0.122628,42.852304,43.18487,27.206988
1,4.264218,4.307727,0.574774,109.930803,0.087898,68.490156,83.898143,110.894181,43.539709,30.333549,...,4.228718,73.478406,111.784235,14.995733,23.684214,15.000862,0.122573,42.800663,43.109788,27.174712
2,4.261574,4.302228,0.574516,109.731298,0.087858,68.386915,83.766801,110.751259,43.403794,30.23073,...,4.226027,73.383883,111.644263,14.979717,23.664721,14.981284,0.122517,42.749084,43.034837,27.142474
3,4.258932,4.296735,0.574257,109.532156,0.087818,68.28383,83.635664,110.608521,43.268303,30.128261,...,4.223339,73.289482,111.504466,14.963718,23.645243,14.961731,0.122462,42.697567,42.960016,27.110275
4,4.256292,4.29125,0.573998,109.333375,0.087779,68.180901,83.504733,110.465966,43.133235,30.026138,...,4.220652,73.195203,111.364845,14.947736,23.625781,14.942204,0.122407,42.646113,42.885325,27.078113
5,4.253653,4.285771,0.57374,109.134955,0.087739,68.078126,83.374007,110.323596,42.998589,29.924362,...,4.217966,73.101045,111.225398,14.931771,23.606336,14.922702,0.122352,42.59472,42.810764,27.04599
6,4.251016,4.2803,0.573482,108.936895,0.087699,67.975507,83.243485,110.181408,42.864363,29.82293,...,4.215283,73.007007,111.086126,14.915824,23.586906,14.903226,0.122297,42.54339,42.736333,27.013905
7,4.248381,4.274836,0.573223,108.739195,0.087659,67.873042,83.113168,110.039405,42.730556,29.721842,...,4.212601,72.913091,110.947029,14.899893,23.567492,14.883775,0.122242,42.492121,42.662031,26.981858
8,4.245747,4.269378,0.572965,108.541853,0.08762,67.770732,82.983054,109.897584,42.597167,29.621097,...,4.209921,72.819296,110.808105,14.883979,23.548095,14.86435,0.122187,42.440914,42.587859,26.94985
9,4.243115,4.263928,0.572707,108.34487,0.08758,67.668576,82.853145,109.755946,42.464194,29.520694,...,4.207242,72.725621,110.669356,14.868083,23.528713,14.844949,0.122132,42.389769,42.513815,26.917879


In [6]:
df_results.to_csv('output/train_arima.csv', index=False)