# Time series forcasting on daily minimal temperatures

In [239]:
import numpy as np
import pandas as pd

## Create function to create time shift series

In [271]:
def tweak_daily_min_temperature(df_):
    return (df_.assign(Date = lambda df: df.Date.astype('datetime64').dt.tz_localize('Australia/Melbourne')
                      )
            .set_index('Date')
           )

def time_series_to_supervised_learning(df_, n_shift_back=None, n_shift_forward=None, dropna=True):
    '''Function is designed to create new columns of all existing columns of df_. df_is seen as a time series.
    Thus, it is recommended to store all time information in index.
    df_ is time series pandas.Series or pandas.DataFrame
    n_shift_back is integer of how often index should be shiftet backwards. Creating new columns each shift.
    n_shift_forward is integer of how often index should be shiftet forwards. Creating new columns each shift.'''
    # Storing all column labels of initial DataFrame
    cols = df_.columns.values
    # Step through all backward time steps, when some were given
    if n_shift_back != None:
        for i in range(1, n_shift_back + 1):
            # dictionary to store new columns assignments in
            shift_dict = {}
            # step through all columns of initial DataFrame
            for _, col in enumerate(cols):
                # construct name of new (shiftet in time) column
                col_name = col + '(t' + str(-i) + ')'
                # store command to shift column in dictionary and use constructed column name as key
                shift_dict.update({col_name: df_[col].shift(i)})
            df_ = df_.assign(**shift_dict)
            # read colum names, so they can be ordered afterwards
            temporary_cols = list(df_.columns)
            # slice of the new columns assigned to the right of df_ and sort them before all other columns
            new_cols = temporary_cols[-len(cols):] + temporary_cols[:-len(cols)]
            # Reorder columns to new order
            df_ = df_[new_cols]
    if dropna:
        return df_.dropna()
    else:
        return df_

In [272]:
df_raw = pd.read_csv('daily-min-temperatures.csv')
df_raw = df_raw.assign(new=df_raw.Temp*2)

In [273]:
df = tweak_daily_min_temperature(df_raw)

In [274]:
time_series_to_supervised_learning(df, n_shift_back=2)

['Temp', 'new', 'Temp(t-1)', 'new(t-1)']
['Temp(t-1)', 'new(t-1)', 'Temp', 'new']
['Temp(t-1)', 'new(t-1)', 'Temp', 'new', 'Temp(t-2)', 'new(t-2)']
['Temp(t-2)', 'new(t-2)', 'Temp(t-1)', 'new(t-1)', 'Temp', 'new']


Unnamed: 0_level_0,Temp(t-2),new(t-2),Temp(t-1),new(t-1),Temp,new
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1981-01-03 00:00:00+11:00,20.7,41.4,17.9,35.8,18.8,37.6
1981-01-04 00:00:00+11:00,17.9,35.8,18.8,37.6,14.6,29.2
1981-01-05 00:00:00+11:00,18.8,37.6,14.6,29.2,15.8,31.6
1981-01-06 00:00:00+11:00,14.6,29.2,15.8,31.6,15.8,31.6
1981-01-07 00:00:00+11:00,15.8,31.6,15.8,31.6,15.8,31.6
...,...,...,...,...,...,...
1990-12-27 00:00:00+11:00,12.9,25.8,14.6,29.2,14.0,28.0
1990-12-28 00:00:00+11:00,14.6,29.2,14.0,28.0,13.6,27.2
1990-12-29 00:00:00+11:00,14.0,28.0,13.6,27.2,13.5,27.0
1990-12-30 00:00:00+11:00,13.6,27.2,13.5,27.0,15.7,31.4


In [265]:
df

Unnamed: 0_level_0,Temp,new
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1981-01-01 00:00:00+11:00,20.7,41.4
1981-01-02 00:00:00+11:00,17.9,35.8
1981-01-03 00:00:00+11:00,18.8,37.6
1981-01-04 00:00:00+11:00,14.6,29.2
1981-01-05 00:00:00+11:00,15.8,31.6
...,...,...
1990-12-27 00:00:00+11:00,14.0,28.0
1990-12-28 00:00:00+11:00,13.6,27.2
1990-12-29 00:00:00+11:00,13.5,27.0
1990-12-30 00:00:00+11:00,15.7,31.4


0
