# Scale the Data

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Import-Modules:" data-toc-modified-id="Import-Modules:-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Import Modules:</a></span></li><li><span><a href="#Import-Data:" data-toc-modified-id="Import-Data:-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Import Data:</a></span></li><li><span><a href="#Setup-Train-and-Test-Data-Sets:" data-toc-modified-id="Setup-Train-and-Test-Data-Sets:-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Setup Train and Test Data Sets:</a></span></li><li><span><a href="#Apply-Standard-Scaler:" data-toc-modified-id="Apply-Standard-Scaler:-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Apply Standard Scaler:</a></span></li><li><span><a href="#Save-Scaled-Data:" data-toc-modified-id="Save-Scaled-Data:-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Save Scaled Data:</a></span></li></ul></div>

### Import Modules:

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

### Import Data:

In [2]:
# Filepaths / names:
clean_train_file = '../Data/train_clean_processed.csv'
clean_test_file = '../Data/test_clean_processed.csv'

In [3]:
data_train = pd.read_csv(clean_train_file,index_col='Unnamed: 0')
data_train.head(2)

  mask |= (ar1 == a)


Unnamed: 0,time,signal,open_channels,ewm_signal,dsdt,d2sdt2
0,0.0001,-2.76,0,-2.76,-0.0957,0.272
1,0.0002,-2.8557,0,-2.8238,0.1763,-0.0233


In [4]:
data_test = pd.read_csv(clean_test_file,index_col='Unnamed: 0')
data_test.head(2)

  mask |= (ar1 == a)


Unnamed: 0,time,signal,ewm_signal,dsdt,d2sdt2
0,500.0001,-2.6498,-2.6498,-0.1996,0.0945
1,500.0002,-2.8495,-2.7829,-0.1051,0.2034


### Setup Train and Test Data Sets:

In [5]:
Xcolumns = list(data_train.columns)
Xcolumns.remove('open_channels')
Xcolumns.remove('time')
print(Xcolumns)

['signal', 'ewm_signal', 'dsdt', 'd2sdt2']


In [6]:
X1 = data_train[Xcolumns]
y1 = data_train['open_channels']

In [7]:
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1,
                        stratify=y1, test_size=0.3, random_state=1)

In [8]:
X2 = data_test[Xcolumns]

### Apply Standard Scaler:

In [9]:
scaler1 = StandardScaler()

In [10]:
X1_train = scaler1.fit_transform(X1_train)
X1_test = scaler1.transform(X1_test)

X1_train = pd.DataFrame(X1_train, columns=X1.columns)
X1_test = pd.DataFrame(X1_test, columns=X1.columns)

In [11]:
X2_test = scaler1.transform(X2)

X2_test = pd.DataFrame(X2_test, columns=X2.columns)

### Save Scaled Data:

In [12]:
# Filepaths / names:
y1_train_file = '../Data/y1_train_clean_processed.csv'
y1_test_file = '../Data/y1_test_clean_processed.csv'
X1_train_file = '../Data/X1_train_clean_processed.csv'
X1_test_file = '../Data/X1_test_clean_processed.csv'
X2_test_file = '../Data/X2_test_clean_processed.csv'

In [13]:
y1_train.to_csv(y1_train_file,float_format='%.4f',index=False)

In [14]:
y1_test.to_csv(y1_test_file,float_format='%.4f',index=False)

In [15]:
X1_train.to_csv(X1_train_file,float_format='%.4f',index=False)

In [16]:
X1_test.to_csv(X1_test_file,float_format='%.4f',index=False)

In [17]:
X2_test.to_csv(X2_test_file,float_format='%.4f',index=False)