## Functions

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, PowerTransformer

In [None]:
data = pd.read_csv('../ex_data_2.csv')
data

In [None]:
# Standardize
def standardize(df):
    standardize = StandardScaler()
    trans = standardize.fit_transform(df)
    return pd.DataFrame(trans)

standardize(data)

In [None]:
# Normalize
def normalize(df, min, max):
    normalize = MinMaxScaler(feature_range = (min, max))
    trans = normalize.fit_transform(df)
    return pd.DataFrame(trans)

normalize(data, 0, 1)

In [None]:
# Moving average filter
def moving_avg_filter(df, window_size):
    ws = window_size
    if(window_size >= len(df.index)): 
        ws = len(df.index)
    print(ws)
    filtered = df.rolling(window = ws).mean()
    filtered = filtered.dropna()
    filtered = filtered.reset_index(drop = True)
    return filtered

moving_avg_filter(data, 2)

In [None]:
# Difference transformation
def difference_transformation(df):
    trans = df.diff()
    trans = trans.dropna()
    trans = trans.reset_index(drop = True)
    return trans

difference_transformation(data)

In [None]:
# Box-Cox power transformation
def box_cox_power_trans(df):
    scale = MinMaxScaler(feature_range = (1, 2))
    bc = PowerTransformer(method='box-cox')
    trans = scale.fit_transform(df)
    trans = bc.fit_transform(trans)
    return pd.DataFrame(trans)

box_cox_power_trans(data)

In [None]:
# Yeo-Johnson transformation
def yeo_johns_power_trans(df):
    yj = PowerTransformer(method='yeo-johnson')
    trans = yj.fit_transform(df)
    return pd.DataFrame(trans)

yeo_johns_power_trans(data)

In [None]:
# Divide standard deviations per column
def div_stand_devs(df):
    sd = df.std(axis = 0)
    for i in df:
        df[i] = df[i] / sd[i]
    return df
    
div_stand_devs(data)

In [None]:
# Subtract means per column
def sub_means(df):
    means = df.mean(axis = 0)
    for i in df:
        df[i] = df[i] / means[i]
    return df

sub_means(data)

## Visualizations

In [None]:
from matplotlib import pyplot

In [None]:
# Line plot
data.plot(style='.')
pyplot.title('Original Data')
pyplot.show()
yeo_johns_power_trans(data).plot(style = '.')
pyplot.title('Preprocessed Data')
pyplot.show()

In [None]:
# Histogram
data.iloc[:, 0].hist()
pyplot.show()
yeo_johns_power_trans(data).iloc[:, 0].hist()
pyplot.show()

In [None]:
# Density
data.plot(kind = 'kde')
pyplot.title('Original Data')
pyplot.show()
yeo_johns_power_trans(data).plot(kind = 'kde')
pyplot.title('Preprocessed Data')
pyplot.show()

In [None]:
# Heatmap
pyplot.matshow(data)
pyplot.title('Original Data')
pyplot.show()
pyplot.matshow(yeo_johns_power_trans(data))
pyplot.title('Preprocessed Data')
pyplot.show()

In [None]:
# Scatter plot
from pandas.plotting import lag_plot
lag_plot(data)
pyplot.title('Original Data')
pyplot.show()
lag_plot(yeo_johns_power_trans(data))
pyplot.title('Preprocessed Data')
pyplot.show()