### Run this first to setup django in this notebook

In [1]:
import os, django
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend.settings")
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
django.setup()

### Import django assets

In [2]:
from weather.models import *
from weather.tasks import *

### Import other libraries

In [3]:
from datetime import datetime
import datetime as dt
import pandas as pd
from django_pandas.io import read_frame

from statsmodels.tsa.statespace.sarimax import SARIMAX

### Global variables

In [4]:
dimensions = ['temp', 'dwpt', 'rhum', 'wdir', 'wspd', 'pres']
next_entry_count = 24

### Script starts here

In [None]:
update_entry()

 21%|█████████▏                                  | 13361/63563 [01:44<06:03, 138.21it/s]

### Check shape and nan values

In [109]:
df = read_frame(Entry.objects.all())
print(df.shape[0])
df.isna().sum()

28544


id      0
time    0
temp    0
dwpt    0
rhum    0
wdir    0
wspd    0
pres    0
coco    0
dtype: int64

In [100]:
train_set = (read_frame(Entry.objects.all())[['time','temp','dwpt','rhum','wdir','wspd','pres']]).copy(deep=True)

# Preparing predictions set
predictions = (read_frame(Entry.objects.all().order_by('-id')[:next_entry_count])[['time','temp','dwpt','rhum','wdir','wspd','pres']]).copy(deep=True)
predictions = predictions.sort_values(by='time')
predictions.reset_index(drop=True, inplace=True)

for index in range(0,len(predictions)):
    predictions.loc[index, 'time'] += dt.timedelta(hours=next_entry_count)
    
# Test
print("Train set")
print(train_set[len(train_set)-next_entry_count:])
print("Test set prep")
print(predictions)

Train set
                           time  temp  dwpt  rhum   wdir  wspd    pres
28520 2024-04-03 08:00:00+00:00  35.0  19.0  39.0  160.0   7.0  1007.0
28521 2024-04-03 09:00:00+00:00  34.2  20.6  45.0   62.0  16.7  1006.4
28522 2024-04-03 10:00:00+00:00  33.1  21.3  50.0   67.0  13.0  1006.9
28523 2024-04-03 11:00:00+00:00  31.9  22.3  57.0   72.0  11.1  1007.3
28524 2024-04-03 12:00:00+00:00  30.8  22.9  63.0   78.0   7.4  1007.8
28525 2024-04-03 13:00:00+00:00  29.8  23.3  68.0   81.0   7.4  1008.5
28526 2024-04-03 14:00:00+00:00  28.9  23.6  73.0   92.0   5.5  1009.2
28527 2024-04-03 15:00:00+00:00  27.9  23.9  79.0  187.0   3.7  1009.9
28528 2024-04-03 16:00:00+00:00  27.3  24.0  82.0  197.0   3.7  1009.7
28529 2024-04-03 17:00:00+00:00  26.8  24.1  85.0  203.0   3.7  1009.4
28530 2024-04-03 18:00:00+00:00  26.2  24.1  88.0  206.0   3.7  1009.1
28531 2024-04-03 19:00:00+00:00  25.8  24.0  90.0  208.0   3.7  1008.9
28532 2024-04-03 20:00:00+00:00  25.4  23.8  91.0  210.0   3.7  100

In [103]:
# Implement rolling SARIMAX

# Setting SARIMAX order
non_seasonal_component = (0,1,0)
seasonal_component = (1, 0, 1, 24)

# Init rolling SARIMA
for index in tqdm(range(0,next_entry_count)):
    for dimension in dimensions:
        if (index!=0):
            row_to_append = predictions.iloc[[index-1]]
            train_set = pd.concat([train_set, row_to_append], ignore_index=True)

        # Init SARIMAX model
        model = SARIMAX(train_set[dimension].values, order=non_seasonal_component, seasonal_order=seasonal_component)
        model_fit = model.fit()

        # Making predictions
        prediction = model_fit.forecast()
        predictions.loc[index, dimension] = prediction
        
        

  0%|                                                            | 0/24 [00:00<?, ?it/s]


TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [85]:
row_to_append = predictions.iloc[[0]]
print(row_to_append)
train_set = pd.concat([train_set, row_to_append], ignore_index=True)
print(train_set)

                       time  temp  dwpt  rhum   wdir wspd    pres
0 2024-04-02 22:00:00+00:00  25.7  24.7  94.0  214.0  3.7  1008.8
                           time  temp  dwpt  rhum   wdir  wspd    pres
0     2021-01-01 00:00:00+00:00  23.0  16.1  65.0   20.0  11.2  1012.0
1     2021-01-01 01:00:00+00:00  23.0  16.1  65.0   10.0   7.6  1013.0
2     2021-01-01 02:00:00+00:00  24.0  16.0  61.0   10.0   7.6  1013.0
3     2021-01-01 03:00:00+00:00  26.0  16.0  54.0   40.0  11.2  1013.0
4     2021-01-01 04:00:00+00:00  28.0  16.0  48.0   40.0  11.2  1013.0
...                         ...   ...   ...   ...    ...   ...     ...
28506 2024-04-02 18:00:00+00:00  26.2  24.4  90.0  243.0   1.8  1009.5
28507 2024-04-02 19:00:00+00:00  25.9  24.3  91.0  240.0   1.8  1009.3
28508 2024-04-02 20:00:00+00:00  25.5  24.3  93.0  235.0   1.8  1009.1
28509 2024-04-02 21:00:00+00:00  25.2  24.0  93.0  224.0   1.8  1008.9
28510 2024-04-02 22:00:00+00:00  25.7  24.7  94.0  214.0   3.7  1008.8

[28511 rows x 7