## Preprocessors

#### Contains :
* converting string or any kind of time objects to pandas datetime timestamp
* Converting pandas datetime to epoch timestamps
* normalising and standardising
* Stationarising the timeseries
* Differencing the time series

In [1]:
import writefile_run

In [2]:
%%writefile_run preprocessors.py 


import numpy as np
import pandas as pd
import datetime as dt
from sklearn import preprocessing 

In [3]:
%%writefile_run preprocessors.py -a


def to_timestamp(dataframe,date_col_index,time_format='%Y-%m',isweek=False):
    '''
    Converts any string datetime object to pandas datetime
    Gets dataframe and date_column index as required args
    timeformat is required for rare timeformats like weekly data
    isweek is bool type which is False for non weekly data
    '''
    if(isweek!=True):
            dateparse = lambda dates: pd.to_datetime(dates,infer_datetime_format=True)
    else:
        dateparse = lambda dates: dt.datetime.strptime(dates+'-0', time_format)
    dataframe[date_col_index].apply(dateparse)
    return dataframe

In [4]:
%%writefile_run preprocessors.py -a


def ts_to_unix(t):
    '''
    Converts datetime to epoch timestamps
    Arguments:
    single datetime object
    '''
    return int((t - dt.datetime(1970, 1, 1)).total_seconds()*1000)

In [5]:
%%writefile_run preprocessors.py -a


def normalise_standardise(data):    
    # Create a minimum and maximum processor object
    min_max_scaler = preprocessing.MinMaxScaler()
    # Create an object to transform the data to fit minmax processor
    data_norm = pd.DataFrame(min_max_scaler.fit_transform(data.values),
                             columns=data.columns,index=data.index)
    data_standardised = (data_norm - data_norm.mean(axis=0))/(data_norm.std(axis=0))
    return data_standardised

In [7]:
%%writefile_run preprocessors.py -a


def stationarize(data):
    '''
    Stationarises the data
    '''
    s,t = fit_seasons(data)

    if(s is not None):
        adj_sea = adjust_seasons(data,seasons=s)
        res_data = adj_sea-(data-detrend(data))
    else:
        res_data = detrend(data)
        
    return res_data

In [8]:
%%writefile_run preprocessors.py -a


def differencing(data,n=1,axis=-1):
    '''
    Does differencing on the data and order of differentiation as parameter
    By default n=1 and axis =-1
    '''
    return np.diff(data,n=n,axis=axis)

In [9]:
R = np.zeros((len(data) + 1, len(data) + 1))
R[0, 0] = 1

predprobs = observation_likelihood.pdf(data)
indexes = np.arange(len(data))
H = hazard_func(np.array(range(indexes+1)))
print(len(H))

[R[1:index+2,index+1] for index  in indexes] = [R[0:index+1,index]*predprobs[index]*(1-H)[index] for index in indexes]


for t, x in enumerate(data):
    # Evaluate the predictive distribution for the new datum under each of
    # the parameters.  This is the standard thing from Bayesian inference.
    predprobs = observation_likelihood.pdf(x)

    # Evaluate the hazard function for this interval
    H = hazard_func(np.array(range(t+1)))

    # Evaluate the growth probabilities - shift the probabilities down and to
    # the right, scaled by the hazard function and the predictive
    # probabilities.
    R[1:t+2, t+1] = R[0:t+1, t] * predprobs * (1-H)

    # Evaluate the probability that there *was* a changepoint and we're
    # accumulating the mass back down at r = 0.
    R[0, t+1] = np.sum( R[0:t+1, t] * predprobs * H)

    # Renormalize the run length probabilities for improved numerical
    # stability.
    R[:, t+1] = R[:, t+1] / np.sum(R[:, t+1])

    # Update the parameter sets for each possible run length.
    observation_likelihood.update_theta(x)

    maxes[t] = R[:, t].argmax()

In [2]:
from math import sqrt
from joblib import Parallel, delayed

# single-core code
sqroots_1 = [sqrt(i ** 2) for i in range(10)]

# parallel code
sqroots_2 = Parallel(n_jobs=2)(delayed(sqrt)(i ** 2) for i in range(10))

ModuleNotFoundError: No module named 'joblib'