In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict

%matplotlib inline

## Loading data

In [2]:
df_train = pd.read_csv("../data/UjiIndoorLoc/TrainingData.csv")
df_test = pd.read_csv("../data/UjiIndoorLoc/ValidationData.csv")

In [5]:
df_train.shape

(19937, 529)

## Data exploration

In [3]:
print("The training set has:")
print("- %d unique building-floor tuples" % len(np.unique(df_train[['BUILDINGID', 'FLOOR']].values, axis=0)))
print("- %d unique space ids" % df_train['SPACEID'].nunique())
print("- %d unique building-floor-space tuples" % len(np.unique(df_train[['BUILDINGID', 'FLOOR', 'SPACEID']].values, axis=0)))
print("- %d unique building-floor-position tuples" % len(np.unique(df_train[['BUILDINGID', 'FLOOR', 'SPACEID', 'RELATIVEPOSITION']].values, axis=0)))
print("- %d unique coordinates" % len(np.unique(df_train[['LONGITUDE', 'LATITUDE']].values, axis=0)))
print("- %d unique building-coordinates" % len(np.unique(df_train[['BUILDINGID', 'LONGITUDE', 'LATITUDE']].values, axis=0)))
print("- %d unique building-floor-coordinates" % len(np.unique(df_train[['BUILDINGID', 'FLOOR', 'LONGITUDE', 'LATITUDE']].values, axis=0)))
print("- %d unique building-floor-space-coordinates" % len(np.unique(df_train[['BUILDINGID', 'FLOOR', 'SPACEID', 'LONGITUDE', 'LATITUDE']].values, axis=0)))
print("- %d unique space-coordinates" % len(np.unique(df_train[['SPACEID', 'LONGITUDE', 'LATITUDE']].values, axis=0)))
print("- %d unique user-space tuples" % len(np.unique(df_train[['USERID', 'SPACEID']].values, axis=0)))

The training set has:
- 13 unique building-floor tuples
- 123 unique rooms
- 735 unique building-floor-room tuples
- 905 unique building-floor-position tuples
- 692 unique coordinates
- 692 unique building-coordinates
- 933 unique building-floor-coordinates
- 933 unique building-floor-room-coordinates
- 852 unique room-coordinates
- 937 unique user-room tuples


## Data pre-processing

### Re-assigning RSSI values

In [4]:
# min(df_train.iloc[:, :520].min(axis=0)) 
# min signal strength is -104
# re-assign undetected signal (100) to -105
df_train[df_train.iloc[:, :520] == 100] = -105
df_train.iloc[:, :520] += 105 # undetected signal to zero

df_test[df_test.iloc[:, :520] == 100] = -105
df_test.iloc[:, :520] += 105 # undetected signal to zero

### Removing WAPs not in use and observations without detecting any signals

In [6]:
# remove rows where all the WAP values equal 100 (not detected)
df_train.drop(df_train[df_train.iloc[:,:520].stack().groupby(level=0).nunique() == 1].index, inplace=True)
df_test.drop(df_test[df_test.loc[:, :'WAP520'].stack().groupby(level=0).nunique() == 1].index, inplace=True) 

# remove WAP columns where all the values equal 100 (not detected)
wap_not_detected_train = [i for i in df_train.columns[:520] if df_train[i].nunique() == 1]
df_train.drop(wap_not_detected_train, axis=1, inplace=True)

print("{} columns are dropped.".format(len(wap_not_detected_train)))

df_test = df_test[df_train.columns]
"""
wap_not_detected_test = [i for i in df_test.columns[:520] if df_test[i].nunique() == 1]
df_test.drop(wap_not_detected_test, axis=1, inplace=True)

print("{} columns are dropped.".format(len(wap_not_detected_test)))
"""

55 columns are dropped.


'\nwap_not_detected_test = [i for i in df_test.columns[:520] if df_test[i].nunique() == 1]\ndf_test.drop(wap_not_detected_test, axis=1, inplace=True)\n\nprint("{} columns are dropped.".format(len(wap_not_detected_test)))\n'

In [7]:
df_train.shape

(19861, 474)

### Assign labels to GPS coordinates 

In [6]:
gps_labels = defaultdict()

unique_coordinates = np.unique(df_train[['FLOOR', 'LATITUDE', 'LONGITUDE']], axis=0)
for idx, gps in enumerate(unique_coordinates):
    gps_labels[tuple(gps)] = str(idx)

ref_points = [gps_labels[tuple([i, j, k])] for i, j, k in zip(df_train['FLOOR'], df_train['LATITUDE'], df_train['LONGITUDE'])]

df_train['LOC'] = ref_points

### Converting data types

In [7]:
# from numeric to string

df_train['BUILDINGID'] = df_train['BUILDINGID'].astype(str)
df_train['FLOOR'] = df_train['FLOOR'].astype(str)
df_train['USERID'] = df_train['USERID'].astype(str)

df_test['BUILDINGID'] = df_test['BUILDINGID'].astype(str)
df_test['FLOOR'] = df_test['FLOOR'].astype(str)
df_test['USERID'] = df_test['USERID'].astype(str)

### Subsetting data, normalising RSSI values, and PCA

In [8]:
from sklearn.decomposition import PCA

pca_train = PCA(n_components=.95)

pca_train_b0 = PCA(n_components=.95)
pca_train_b1 = PCA(n_components=.95)
pca_train_b2 = PCA(n_components=.95)

pca_train_b0_f0 = PCA(n_components=.95)
pca_train_b0_f1 = PCA(n_components=.95)
pca_train_b0_f2 = PCA(n_components=.95)
pca_train_b0_f3 = PCA(n_components=.95)

pca_train_b1_f0 = PCA(n_components=.95)
pca_train_b1_f1 = PCA(n_components=.95)
pca_train_b1_f2 = PCA(n_components=.95)
pca_train_b1_f3 = PCA(n_components=.95)

pca_train_b2_f0 = PCA(n_components=.95)
pca_train_b2_f1 = PCA(n_components=.95)
pca_train_b2_f2 = PCA(n_components=.95)
pca_train_b2_f3 = PCA(n_components=.95)
pca_train_b2_f4 = PCA(n_components=.95)

In [9]:
from sklearn.preprocessing import StandardScaler

ss_train = StandardScaler()
ss_train_b0 = StandardScaler()
ss_train_b1 = StandardScaler()
ss_train_b2 = StandardScaler()

ss_train_b0_f0 = StandardScaler()
ss_train_b0_f1 = StandardScaler()
ss_train_b0_f2 = StandardScaler()
ss_train_b0_f3 = StandardScaler()

ss_train_b1_f0 = StandardScaler()
ss_train_b1_f1 = StandardScaler()
ss_train_b1_f2 = StandardScaler()
ss_train_b1_f3 = StandardScaler()

ss_train_b2_f0 = StandardScaler()
ss_train_b2_f1 = StandardScaler()
ss_train_b2_f2 = StandardScaler()
ss_train_b2_f3 = StandardScaler()
ss_train_b2_f4 = StandardScaler()

In [10]:
# training data
ss_train.fit(df_train.iloc[:, :465])
df_train_wap_ss = ss_train.transform(df_train.iloc[:, :465])
pca_train.fit(df_train_wap_ss)
df_train_wap = pca_train.transform(df_train_wap_ss)

# building 0, 1, 2
df_train_b0 = df_train.loc[df_train['BUILDINGID'] == '0']
df_train_b1 = df_train.loc[df_train['BUILDINGID'] == '1']
df_train_b2 = df_train.loc[df_train['BUILDINGID'] == '2']

ss_train_b0.fit(df_train.loc[df_train['BUILDINGID'] == '0', :'WAP519'])
df_train_b0_wap_ss = ss_train_b0.transform(df_train.loc[df_train['BUILDINGID'] == '0', :'WAP519'])
pca_train_b0.fit(df_train_b0_wap_ss)
df_train_b0_wap = pca_train_b0.transform(df_train_b0_wap_ss)

ss_train_b1.fit(df_train.loc[df_train['BUILDINGID'] == '1', :'WAP519'])
df_train_b1_wap_ss = ss_train_b1.transform(df_train.loc[df_train['BUILDINGID'] == '1', :'WAP519'])
pca_train_b1.fit(df_train_b1_wap_ss)
df_train_b1_wap = pca_train_b1.transform(df_train_b1_wap_ss)

ss_train_b2.fit(df_train.loc[df_train['BUILDINGID'] == '2', :'WAP519'])
df_train_b2_wap_ss = ss_train_b2.transform(df_train.loc[df_train['BUILDINGID'] == '2', :'WAP519'])
pca_train_b2.fit(df_train_b2_wap_ss)
df_train_b2_wap = pca_train_b2.transform(df_train_b2_wap_ss)

In [11]:
# floors in building 0
df_train_b0_f0 = df_train.loc[(df_train['BUILDINGID'] == '0') & 
                              (df_train['FLOOR'] == '0')]
df_train_b0_f1 = df_train.loc[(df_train['BUILDINGID'] == '0') & 
                              (df_train['FLOOR'] == '1')]
df_train_b0_f2 = df_train.loc[(df_train['BUILDINGID'] == '0') & 
                              (df_train['FLOOR'] == '2')]
df_train_b0_f3 = df_train.loc[(df_train['BUILDINGID'] == '0') & 
                              (df_train['FLOOR'] == '3')]

# building 0 - floor RSSI
df = df_train.loc[(df_train['BUILDINGID'] == '0') & (df_train['FLOOR'] == '0'),  :'WAP519']
ss_train_b0_f0.fit(df)
df_train_b0_f0_wap_ss = ss_train_b0_f0.transform(df)
pca_train_b0_f0.fit(df_train_b0_f0_wap_ss)
df_train_b0_f0_wap = pca_train_b0_f0.transform(df_train_b0_f0_wap_ss)

df = df_train.loc[(df_train['BUILDINGID'] == '0') & (df_train['FLOOR'] == '1'),  :'WAP519']
ss_train_b0_f1.fit(df)
df_train_b0_f1_wap_ss = ss_train_b0_f1.transform(df)
pca_train_b0_f1.fit(df_train_b0_f1_wap_ss)
df_train_b0_f1_wap = pca_train_b0_f1.transform(df_train_b0_f1_wap_ss)

df = df_train.loc[(df_train['BUILDINGID'] == '0') & (df_train['FLOOR'] == '2'),  :'WAP519']
ss_train_b0_f2.fit(df)
df_train_b0_f2_wap_ss = ss_train_b0_f2.transform(df)
pca_train_b0_f2.fit(df_train_b0_f2_wap_ss)
df_train_b0_f2_wap = pca_train_b0_f2.transform(df_train_b0_f2_wap_ss)

df = df_train.loc[(df_train['BUILDINGID'] == '0') & (df_train['FLOOR'] == '3'),  :'WAP519']
ss_train_b0_f3.fit(df)
df_train_b0_f3_wap_ss = ss_train_b0_f3.transform(df)
pca_train_b0_f3.fit(df_train_b0_f3_wap_ss)
df_train_b0_f3_wap = pca_train_b0_f3.transform(df_train_b0_f3_wap_ss)

In [12]:
# floors in building 1
df_train_b1_f0 = df_train.loc[(df_train['BUILDINGID'] == '1') & 
                              (df_train['FLOOR'] == '0')]
df_train_b1_f1 = df_train.loc[(df_train['BUILDINGID'] == '1') & 
                              (df_train['FLOOR'] == '1')]
df_train_b1_f2 = df_train.loc[(df_train['BUILDINGID'] == '1') & 
                              (df_train['FLOOR'] == '2')]
df_train_b1_f3 = df_train.loc[(df_train['BUILDINGID'] == '1') & 
                              (df_train['FLOOR'] == '3')]

# building 1 - floor RSSI
df = df_train.loc[(df_train['BUILDINGID'] == '1') & (df_train['FLOOR'] == '0'),  :'WAP519']
ss_train_b1_f0.fit(df)
df_train_b1_f0_wap_ss = ss_train_b1_f0.transform(df)
pca_train_b1_f0.fit(df_train_b1_f0_wap_ss)
df_train_b1_f0_wap = pca_train_b1_f0.transform(df_train_b1_f0_wap_ss)

df = df_train.loc[(df_train['BUILDINGID'] == '1') & (df_train['FLOOR'] == '1'),  :'WAP519']
ss_train_b1_f1.fit(df)
df_train_b1_f1_wap_ss = ss_train_b1_f1.transform(df)
pca_train_b1_f1.fit(df_train_b1_f1_wap_ss)
df_train_b1_f1_wap = pca_train_b1_f1.transform(df_train_b1_f1_wap_ss)

df = df_train.loc[(df_train['BUILDINGID'] == '1') & (df_train['FLOOR'] == '2'),  :'WAP519']
ss_train_b1_f2.fit(df)
df_train_b1_f2_wap_ss = ss_train_b1_f2.transform(df)
pca_train_b1_f2.fit(df_train_b1_f2_wap_ss)
df_train_b1_f2_wap = pca_train_b1_f2.transform(df_train_b1_f2_wap_ss)

df = df_train.loc[(df_train['BUILDINGID'] == '1') & (df_train['FLOOR'] == '3'),  :'WAP519']
ss_train_b1_f3.fit(df)
df_train_b1_f3_wap_ss = ss_train_b1_f3.transform(df)
pca_train_b1_f3.fit(df_train_b1_f3_wap_ss)
df_train_b1_f3_wap = pca_train_b1_f3.transform(df_train_b1_f3_wap_ss)

In [13]:
# floors in building 2
df_train_b2_f0 = df_train.loc[(df_train['BUILDINGID'] == '2') & 
                              (df_train['FLOOR'] == '0')]
df_train_b2_f1 = df_train.loc[(df_train['BUILDINGID'] == '2') & 
                              (df_train['FLOOR'] == '1')]
df_train_b2_f2 = df_train.loc[(df_train['BUILDINGID'] == '2') & 
                              (df_train['FLOOR'] == '2')]
df_train_b2_f3 = df_train.loc[(df_train['BUILDINGID'] == '2') & 
                              (df_train['FLOOR'] == '3')]
df_train_b2_f4 = df_train.loc[(df_train['BUILDINGID'] == '2') & 
                              (df_train['FLOOR'] == '4')]

# building 2 - floor RSSI
df = df_train.loc[(df_train['BUILDINGID'] == '2') & (df_train['FLOOR'] == '0'),  :'WAP519']
ss_train_b2_f0.fit(df)
df_train_b2_f0_wap_ss = ss_train_b2_f0.transform(df)
pca_train_b2_f0.fit(df_train_b2_f0_wap_ss)
df_train_b2_f0_wap = pca_train_b2_f0.transform(df_train_b2_f0_wap_ss)

df = df_train.loc[(df_train['BUILDINGID'] == '2') & (df_train['FLOOR'] == '1'),  :'WAP519']
ss_train_b2_f1.fit(df)
df_train_b2_f1_wap_ss = ss_train_b2_f1.transform(df)
pca_train_b2_f1.fit(df_train_b2_f1_wap_ss)
df_train_b2_f1_wap = pca_train_b2_f1.transform(df_train_b2_f1_wap_ss)

df = df_train.loc[(df_train['BUILDINGID'] == '2') & (df_train['FLOOR'] == '2'),  :'WAP519']
ss_train_b2_f2.fit(df)
df_train_b2_f2_wap_ss = ss_train_b2_f2.transform(df)
pca_train_b2_f2.fit(df_train_b2_f2_wap_ss)
df_train_b2_f2_wap = pca_train_b2_f2.transform(df_train_b2_f2_wap_ss)

df = df_train.loc[(df_train['BUILDINGID'] == '2') & (df_train['FLOOR'] == '3'),  :'WAP519']
ss_train_b2_f3.fit(df)
df_train_b2_f3_wap_ss = ss_train_b2_f3.transform(df)
pca_train_b2_f3.fit(df_train_b2_f3_wap_ss)
df_train_b2_f3_wap = pca_train_b2_f3.transform(df_train_b2_f3_wap_ss)

df = df_train.loc[(df_train['BUILDINGID'] == '2') & (df_train['FLOOR'] == '4'),  :'WAP519']
ss_train_b2_f4.fit(df)
df_train_b2_f4_wap_ss = ss_train_b2_f4.transform(df)
pca_train_b2_f4.fit(df_train_b2_f4_wap_ss)
df_train_b2_f4_wap = pca_train_b2_f4.transform(df_train_b2_f4_wap_ss)

Apply PCA and normalisation to test data

In [14]:
# building 0, 1, 2
df_test_b0 = df_test.loc[df_test['BUILDINGID'] == '0']
df_test_b1 = df_test.loc[df_test['BUILDINGID'] == '1']
df_test_b2 = df_test.loc[df_test['BUILDINGID'] == '2']

# floors in building 0
df_test_b0_f0 = df_test.loc[(df_test['BUILDINGID'] == '0') & 
                              (df_test['FLOOR'] == '0')]
df_test_b0_f1 = df_test.loc[(df_test['BUILDINGID'] == '0') & 
                              (df_test['FLOOR'] == '1')]
df_test_b0_f2 = df_test.loc[(df_test['BUILDINGID'] == '0') & 
                              (df_test['FLOOR'] == '2')]
df_test_b0_f3 = df_test.loc[(df_test['BUILDINGID'] == '0') & 
                              (df_test['FLOOR'] == '3')]

# floors in building 1
df_test_b1_f0 = df_test.loc[(df_test['BUILDINGID'] == '1') & 
                              (df_test['FLOOR'] == '0')]
df_test_b1_f1 = df_test.loc[(df_test['BUILDINGID'] == '1') & 
                              (df_test['FLOOR'] == '1')]
df_test_b1_f2 = df_test.loc[(df_test['BUILDINGID'] == '1') & 
                              (df_test['FLOOR'] == '2')]
df_test_b1_f3 = df_test.loc[(df_test['BUILDINGID'] == '1') & 
                              (df_test['FLOOR'] == '3')]

# floors in building 2
df_test_b2_f0 = df_test.loc[(df_test['BUILDINGID'] == '2') & 
                              (df_test['FLOOR'] == '0')]
df_test_b2_f1 = df_test.loc[(df_test['BUILDINGID'] == '2') & 
                              (df_test['FLOOR'] == '1')]
df_test_b2_f2 = df_test.loc[(df_test['BUILDINGID'] == '2') & 
                              (df_test['FLOOR'] == '2')]
df_test_b2_f3 = df_test.loc[(df_test['BUILDINGID'] == '2') & 
                              (df_test['FLOOR'] == '3')]
df_test_b2_f4 = df_test.loc[(df_test['BUILDINGID'] == '2') & 
                              (df_test['FLOOR'] == '4')]

In [15]:
df_test_wap_ss = ss_train.transform(df_test.iloc[:, :465])
df_test_wap = pca_train.transform(df_test_wap_ss)

# building RSSI
df_test_b0_wap_ss = ss_train_b0.transform(df_test.loc[df_test['BUILDINGID'] == '0', :'WAP519'])
df_test_b0_wap = pca_train_b0.transform(df_test_b0_wap_ss)

df_test_b1_wap_ss = ss_train_b1.transform(df_test.loc[df_test['BUILDINGID'] == '1', :'WAP519'])
df_test_b1_wap = pca_train_b1.transform(df_test_b1_wap_ss)

df_test_b2_wap_ss = ss_train_b2.transform(df_test.loc[df_test['BUILDINGID'] == '2', :'WAP519'])
df_test_b2_wap = pca_train_b2.transform(df_test_b2_wap_ss)

In [16]:
# building 0 - floor RSSI
df = df_test.loc[(df_test['BUILDINGID'] == '0') & (df_test['FLOOR'] == '0'),  :'WAP519']
df_test_b0_f0_wap_ss = ss_train_b0_f0.transform(df)
df_test_b0_f0_wap = pca_train_b0_f0.transform(df_test_b0_f0_wap_ss)

df = df_test.loc[(df_test['BUILDINGID'] == '0') & (df_test['FLOOR'] == '1'),  :'WAP519']
df_test_b0_f1_wap_ss = ss_train_b0_f1.transform(df)
df_test_b0_f1_wap = pca_train_b0_f1.transform(df_test_b0_f1_wap_ss)

df = df_test.loc[(df_test['BUILDINGID'] == '0') & (df_test['FLOOR'] == '2'),  :'WAP519']
df_test_b0_f2_wap_ss = ss_train_b0_f2.transform(df)
df_test_b0_f2_wap = pca_train_b0_f2.transform(df_test_b0_f2_wap_ss)

df = df_test.loc[(df_test['BUILDINGID'] == '0') & (df_test['FLOOR'] == '3'),  :'WAP519']
df_test_b0_f3_wap_ss = ss_train_b0_f3.transform(df)
df_test_b0_f3_wap = pca_train_b0_f3.transform(df_test_b0_f3_wap_ss)

In [17]:
# building 1 - floor RSSI
df = df_test.loc[(df_test['BUILDINGID'] == '1') & (df_test['FLOOR'] == '0'),  :'WAP519']
df_test_b1_f0_wap_ss = ss_train_b1_f0.transform(df)
df_test_b1_f0_wap = pca_train_b1_f0.transform(df_test_b1_f0_wap_ss)

df = df_test.loc[(df_test['BUILDINGID'] == '1') & (df_test['FLOOR'] == '1'),  :'WAP519']
df_test_b1_f1_wap_ss = ss_train_b1_f1.transform(df)
df_test_b1_f1_wap = pca_train_b1_f1.transform(df_test_b1_f1_wap_ss)

df = df_test.loc[(df_test['BUILDINGID'] == '1') & (df_test['FLOOR'] == '2'),  :'WAP519']
df_test_b1_f2_wap_ss = ss_train_b1_f2.transform(df)
df_test_b1_f2_wap = pca_train_b1_f2.transform(df_test_b1_f2_wap_ss)

df = df_test.loc[(df_test['BUILDINGID'] == '1') & (df_test['FLOOR'] == '3'),  :'WAP519']
df_test_b1_f3_wap_ss = ss_train_b1_f3.transform(df)
df_test_b1_f3_wap = pca_train_b1_f3.transform(df_test_b1_f3_wap_ss)

In [18]:
# building 2 - floor RSSI
df = df_test.loc[(df_test['BUILDINGID'] == '2') & (df_test['FLOOR'] == '0'),  :'WAP519']
df_test_b2_f0_wap_ss = ss_train_b2_f0.transform(df)
df_test_b2_f0_wap = pca_train_b2_f0.transform(df_test_b2_f0_wap_ss)

df = df_test.loc[(df_test['BUILDINGID'] == '2') & (df_test['FLOOR'] == '1'),  :'WAP519']
df_test_b2_f1_wap_ss = ss_train_b2_f1.transform(df)
df_test_b2_f1_wap = pca_train_b2_f1.transform(df_test_b2_f1_wap_ss)

df = df_test.loc[(df_test['BUILDINGID'] == '2') & (df_test['FLOOR'] == '2'),  :'WAP519']
df_test_b2_f2_wap_ss = ss_train_b2_f2.transform(df)
df_test_b2_f2_wap = pca_train_b2_f2.transform(df_test_b2_f2_wap_ss)

df = df_test.loc[(df_test['BUILDINGID'] == '2') & (df_test['FLOOR'] == '3'),  :'WAP519']
df_test_b2_f3_wap_ss = ss_train_b2_f3.transform(df)
df_test_b2_f3_wap = pca_train_b2_f3.transform(df_test_b2_f3_wap_ss)

df = df_test.loc[(df_test['BUILDINGID'] == '2') & (df_test['FLOOR'] == '4'),  :'WAP519']
df_test_b2_f4_wap_ss = ss_train_b2_f4.transform(df)
df_test_b2_f4_wap = pca_train_b2_f4.transform(df_test_b2_f4_wap_ss)

## Saving processed data

In [19]:
# train
df_train.to_csv("../data/train/df_train.csv")

df_train_b0.to_csv("../data/train/df_train_b0.csv")
df_train_b1.to_csv("../data/train/df_train_b1.csv")
df_train_b2.to_csv("../data/train/df_train_b2.csv")

df_train_b0_f0.to_csv("../data/train/df_train_b0_f0.csv")
df_train_b0_f1.to_csv("../data/train/df_train_b0_f1.csv")
df_train_b0_f2.to_csv("../data/train/df_train_b0_f2.csv")
df_train_b0_f3.to_csv("../data/train/df_train_b0_f3.csv")

df_train_b1_f0.to_csv("../data/train/df_train_b1_f0.csv")
df_train_b1_f1.to_csv("../data/train/df_train_b1_f1.csv")
df_train_b1_f2.to_csv("../data/train/df_train_b1_f2.csv")
df_train_b1_f3.to_csv("../data/train/df_train_b1_f3.csv")

df_train_b2_f0.to_csv("../data/train/df_train_b2_f0.csv")
df_train_b2_f1.to_csv("../data/train/df_train_b2_f1.csv")
df_train_b2_f2.to_csv("../data/train/df_train_b2_f2.csv")
df_train_b2_f3.to_csv("../data/train/df_train_b2_f3.csv")
df_train_b2_f4.to_csv("../data/train/df_train_b2_f4.csv")

In [20]:
np.save("../data/train/df_train_wap.npy", df_train_wap)

np.save("../data/train/df_train_b0_wap.npy", df_train_b0_wap)
np.save("../data/train/df_train_b1_wap.npy", df_train_b1_wap)
np.save("../data/train/df_train_b2_wap.npy", df_train_b2_wap)

np.save("../data/train/df_train_b0_f0_wap.npy", df_train_b0_f0_wap)
np.save("../data/train/df_train_b0_f1_wap.npy", df_train_b0_f1_wap)
np.save("../data/train/df_train_b0_f2_wap.npy", df_train_b0_f2_wap)
np.save("../data/train/df_train_b0_f3_wap.npy", df_train_b0_f3_wap)

np.save("../data/train/df_train_b1_f0_wap.npy", df_train_b1_f0_wap)
np.save("../data/train/df_train_b1_f1_wap.npy", df_train_b1_f1_wap)
np.save("../data/train/df_train_b1_f2_wap.npy", df_train_b1_f2_wap)
np.save("../data/train/df_train_b1_f3_wap.npy", df_train_b1_f3_wap)

np.save("../data/train/df_train_b2_f0_wap.npy", df_train_b2_f0_wap)
np.save("../data/train/df_train_b2_f1_wap.npy", df_train_b2_f1_wap)
np.save("../data/train/df_train_b2_f2_wap.npy", df_train_b2_f2_wap)
np.save("../data/train/df_train_b2_f3_wap.npy", df_train_b2_f3_wap)
np.save("../data/train/df_train_b2_f4_wap.npy", df_train_b2_f4_wap)

In [21]:
# test
df_test.to_csv("../data/test/df_test.csv")

df_test_b0.to_csv("../data/test/df_test_b0.csv")
df_test_b1.to_csv("../data/test/df_test_b1.csv")
df_test_b2.to_csv("../data/test/df_test_b2.csv")

df_test_b0_f0.to_csv("../data/test/df_test_b0_f0.csv")
df_test_b0_f1.to_csv("../data/test/df_test_b0_f1.csv")
df_test_b0_f2.to_csv("../data/test/df_test_b0_f2.csv")
df_test_b0_f3.to_csv("../data/test/df_test_b0_f3.csv")

df_test_b1_f0.to_csv("../data/test/df_test_b1_f0.csv")
df_test_b1_f1.to_csv("../data/test/df_test_b1_f1.csv")
df_test_b1_f2.to_csv("../data/test/df_test_b1_f2.csv")
df_test_b1_f3.to_csv("../data/test/df_test_b1_f3.csv")

df_test_b2_f0.to_csv("../data/test/df_test_b2_f0.csv")
df_test_b2_f1.to_csv("../data/test/df_test_b2_f1.csv")
df_test_b2_f2.to_csv("../data/test/df_test_b2_f2.csv")
df_test_b2_f3.to_csv("../data/test/df_test_b2_f3.csv")
df_test_b2_f4.to_csv("../data/test/df_test_b2_f4.csv")

In [23]:
np.save("../data/test/df_test_wap.npy", df_test_wap)

np.save("../data/test/df_test_b0_wap.npy", df_test_b0_wap)
np.save("../data/test/df_test_b1_wap.npy", df_test_b1_wap)
np.save("../data/test/df_test_b2_wap.npy", df_test_b2_wap)

np.save("../data/test/df_test_b0_f0_wap.npy", df_test_b0_f0_wap)
np.save("../data/test/df_test_b0_f1_wap.npy", df_test_b0_f1_wap)
np.save("../data/test/df_test_b0_f2_wap.npy", df_test_b0_f2_wap)
np.save("../data/test/df_test_b0_f3_wap.npy", df_test_b0_f3_wap)

np.save("../data/test/df_test_b1_f0_wap.npy", df_test_b1_f0_wap)
np.save("../data/test/df_test_b1_f1_wap.npy", df_test_b1_f1_wap)
np.save("../data/test/df_test_b1_f2_wap.npy", df_test_b1_f2_wap)
np.save("../data/test/df_test_b1_f3_wap.npy", df_test_b1_f3_wap)

np.save("../data/test/df_test_b2_f0_wap.npy", df_test_b2_f0_wap)
np.save("../data/test/df_test_b2_f1_wap.npy", df_test_b2_f1_wap)
np.save("../data/test/df_test_b2_f2_wap.npy", df_test_b2_f2_wap)
np.save("../data/test/df_test_b2_f3_wap.npy", df_test_b2_f3_wap)
np.save("../data/test/df_test_b2_f4_wap.npy", df_test_b2_f4_wap)