## Functions

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, PowerTransformer

In [None]:
data = pd.read_csv('../ex_data_2.csv')
data.head()

In [None]:
# Standardize
def standardize(df):
    standardize = StandardScaler()
    trans = standardize.fit_transform(df)
    return pd.DataFrame(trans)

standardize(data).head()

In [None]:
# Normalize
def normalize(df, min, max):
    normalize = MinMaxScaler(feature_range = (min, max))
    trans = normalize.fit_transform(df)
    return pd.DataFrame(trans)

normalize(data, 0, 1).head()

In [None]:
# Moving average filter
def moving_avg_filter(df, window_size):
    ws = window_size
    if(window_size >= len(df.index)): 
        ws = len(df.index)
    print(ws)
    filtered = df.rolling(window = ws).mean()
    filtered = filtered.dropna()
    filtered = filtered.reset_index(drop = True)
    return filtered

moving_avg_filter(data, 2).head()

In [None]:
# Difference transformation
def difference_transformation(df):
    trans = df.diff()
    trans = trans.dropna()
    trans = trans.reset_index(drop = True)
    return trans

difference_transformation(data).head()

In [None]:
# Box-Cox power transformation
def box_cox_power_trans(df):
    scale = MinMaxScaler(feature_range = (1, 2))
    bc = PowerTransformer(method='box-cox')
    trans = scale.fit_transform(df)
    trans = bc.fit_transform(trans)
    return pd.DataFrame(trans)

box_cox_power_trans(data).head()

In [None]:
# Yeo-Johnson transformation
def yeo_johns_power_trans(df):
    yj = PowerTransformer(method='yeo-johnson')
    trans = yj.fit_transform(df)
    return pd.DataFrame(trans)

yeo_johns_power_trans(data).head()

In [None]:
# Divide standard deviations per column
def div_stand_devs(df):
    sd = df.std(axis = 0)
    for i in df:
        df[i] = df[i] / sd[i]
    return df
    
div_stand_devs(data).head()

In [None]:
# Subtract means per column
def sub_means(df):
    means = df.mean(axis = 0)
    for i in df:
        df[i] = df[i] / means[i]
    return df

sub_means(data).head()

## Visualizations

In [None]:
from matplotlib import pyplot

In [None]:
# To save the plot as an image, do plt.savefig('graph.jpg')

In [None]:
# Line plot
data.plot()
pyplot.title('Line Plot: Original Data')
#pyplot.savefig('C:/Users/Owner/Downloads/scatter-original.jpg')
pyplot.show()
moving_avg_filter(data, 3).plot()
pyplot.title('Line Plot: Preprocessed Data')
#pyplot.savefig('C:/Users/Owner/Downloads/scatter-preprocessed.jpg')
pyplot.show()

In [None]:
# Histogram
data.iloc[:, 0].hist()
pyplot.title('Histogram of 1st feature: Original Data')
#pyplot.savefig('C:/Users/Owner/Downloads/hist1-original.jpg')
pyplot.show()
moving_avg_filter(data, 3).iloc[:, 0].hist()
pyplot.title('Histogram of 1st feature: Preprocessed Data')
#pyplot.savefig('C:/Users/Owner/Downloads/hist1-preprocessed.jpg')
pyplot.show()

In [None]:
# Density
data.plot(kind = 'kde')
pyplot.title('Density: Original Data')
#pyplot.savefig('C:/Users/Owner/Downloads/density-original.jpg')
pyplot.show()
moving_avg_filter(data, 3).plot(kind = 'kde')
pyplot.title('Density: Preprocessed Data')
#pyplot.savefig('C:/Users/Owner/Downloads/density-preprocessed.jpg')
pyplot.show()

In [None]:
# Heatmap
pyplot.matshow(data)
pyplot.title('Heatmap: Original Data')
#pyplot.savefig('C:/Users/Owner/Downloads/heatmap-original.jpg')
pyplot.show()
pyplot.matshow(moving_avg_filter(data, 3))
pyplot.title('Heatmap: Preprocessed Data')
#pyplot.savefig('C:/Users/Owner/Downloads/heatmap-preprocessed.jpg')
pyplot.show()