-
Notifications
You must be signed in to change notification settings - Fork 0
/
norm.py
46 lines (34 loc) · 1.17 KB
/
norm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import pandas as pd
from sklearn.preprocessing import StandardScaler
input_dir = 'data/'
output_dir = 'data/norm/'
base_file_name = 'data_banknote_authentication'
label = 'class'
train_data = pd.read_csv(input_dir + 'train_' + base_file_name + '.csv', sep=',')
test_data = pd.read_csv(input_dir + 'test_' + base_file_name + '.csv', sep=',')
scaler = StandardScaler()
scaler.fit(train_data.drop(label, 1))
def print_info(df):
print(df.drop(label, 1).describe())
print('')
def scale(df):
df_features = df.drop(label, 1)
scaled_data = scaler.transform(df_features)
scaled_df = pd.DataFrame(scaled_data, columns=df_features.columns)
scaled_df[label] = df[label]
return scaled_df
print('Train set')
print_info(train_data)
print('Test set')
print_info(test_data)
train_data = scale(train_data)
test_data = scale(test_data)
print('')
print('Normalized')
print('')
print('Train set')
print_info(train_data)
print('Test set')
print_info(test_data)
train_data.to_csv(output_dir + 'train_' + base_file_name + '.csv', sep=',', index=False)
test_data.to_csv(output_dir + 'test_' + base_file_name + '.csv', sep=',', index=False)