In [1]:
import pandas as pd

In [129]:
df = pd.read_csv('./dataset/daily-minimum-temperatures-in-me.csv', parse_dates=['Date'], index_col='Date')
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3650 entries, 1981-01-01 to 1990-12-31
Data columns (total 1 columns):
 #   Column                                                         Non-Null Count  Dtype 
---  ------                                                         --------------  ----- 
 0   Daily minimum temperatures in Melbourne, Australia, 1981-1990  3650 non-null   object
dtypes: object(1)
memory usage: 57.0+ KB


In [16]:
df.head()

Unnamed: 0_level_0,"Daily minimum temperatures in Melbourne, Australia, 1981-1990"
Date,Unnamed: 1_level_1
1981-01-01,20.7
1981-01-02,17.9
1981-01-03,18.8
1981-01-04,14.6
1981-01-05,15.8


## DATETIME FEATURES

In [31]:
new_df = pd.DataFrame()
new_df = new_df.assign(
    month = df.index.month,
    day = df.index.day,
    temperatures = df[df.columns[0]].values
)

In [32]:
new_df.head()

Unnamed: 0,month,day,temperatures
0,1,1,20.7
1,1,2,17.9
2,1,3,18.8
3,1,4,14.6
4,1,5,15.8


## LAG FEATURES

In [62]:
temps = pd.DataFrame(df.values)
new_temps = pd.concat([temps.shift(3), temps.shift(2), temps.shift(1), temps], keys=['t-2', 't-1', 't', 't+1'], axis=1)
new_temps.columns = new_temps.columns.droplevel(1)
new_temps.head()

Unnamed: 0,t-2,t-1,t,t+1
0,,,,20.7
1,,,20.7,17.9
2,,20.7,17.9,18.8
3,20.7,17.9,18.8,14.6
4,17.9,18.8,14.6,15.8


## ROLLING WINDOW STATISTICS

In [130]:
import re

for i in df.iloc[:,0]:
    if re.search(r'^\D', i, re.I):
        print(i)

?0.2
?0.8
?0.1


In [131]:
df.iloc[:, 0] = df.iloc[:, 0].str.replace(r'\?','', regex=True)

In [132]:
for i in df.iloc[:,0]:
    if re.search(r'^\D', i, re.I):
        print(i)

In [133]:
df.iloc[:, 0] = df.iloc[:, 0].astype(float)

In [135]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3650 entries, 1981-01-01 to 1990-12-31
Data columns (total 1 columns):
 #   Column                                                         Non-Null Count  Dtype  
---  ------                                                         --------------  -----  
 0   Daily minimum temperatures in Melbourne, Australia, 1981-1990  3650 non-null   float64
dtypes: float64(1)
memory usage: 57.0 KB


In [144]:
temps = pd.DataFrame(
    {
        'mean(t-1, t)': df.iloc[:,0].shift(1).rolling(window=2).mean(),
        't+1': df.iloc[:,0]
    }
)
temps

Unnamed: 0_level_0,"mean(t-1, t)",t+1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1981-01-01,,20.7
1981-01-02,,17.9
1981-01-03,19.30,18.8
1981-01-04,18.35,14.6
1981-01-05,16.70,15.8
...,...,...
1990-12-27,13.75,14.0
1990-12-28,14.30,13.6
1990-12-29,13.80,13.5
1990-12-30,13.55,15.7


In [148]:
width = 3
shifted = df.iloc[:, 0].shift(width - 1)
new_data = pd.DataFrame(
    {
        'min': shifted.rolling(window=width).min(),
        'mean': shifted.rolling(window=width).mean(),
        'max': shifted.rolling(window=width).max(),
        't + 1': df.iloc[:, 0]
    }
)
new_data

Unnamed: 0_level_0,min,mean,max,t + 1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1981-01-01,,,,20.7
1981-01-02,,,,17.9
1981-01-03,,,,18.8
1981-01-04,,,,14.6
1981-01-05,17.9,19.133333,20.7,15.8
...,...,...,...,...
1990-12-27,10.0,12.266667,13.9,14.0
1990-12-28,10.0,12.500000,14.6,13.6
1990-12-29,12.9,13.833333,14.6,13.5
1990-12-30,13.6,14.066667,14.6,15.7


## Expanding Window Statistics

In [158]:
window = df.iloc[:, 0].expanding()
dataframe = pd.DataFrame(
    {
        'min': window.min(),
        'max': window.max(),
        'mean': window.mean(),
        't+1': df.iloc[:, 0].shift(-1),
    }
)
dataframe

Unnamed: 0_level_0,min,max,mean,t+1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1981-01-01,20.7,20.7,20.700000,17.9
1981-01-02,17.9,20.7,19.300000,18.8
1981-01-03,17.9,20.7,19.133333,14.6
1981-01-04,14.6,20.7,18.000000,15.8
1981-01-05,14.6,20.7,17.560000,15.8
...,...,...,...,...
1990-12-27,0.0,26.3,11.174712,13.6
1990-12-28,0.0,26.3,11.175377,13.5
1990-12-29,0.0,26.3,11.176014,15.7
1990-12-30,0.0,26.3,11.177254,13.0
