In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import normalize, MinMaxScaler
from sklearn.svm import SVR

In [2]:
def combine_data(person_signal, person_numeric):
    
    signals= pd.read_csv(person_signal, index_col=0)
    numerics = pd.read_csv(person_numeric, index_col=0)
    
    signals['sec'] = signals['Time [s]'].apply(lambda x: int(np.floor(x)))
    
    numerics.rename(columns={'Time [s]':'sec'}, inplace=True)
    numerics.drop(' RESP',axis=1, inplace=True)
    numerics['sec'] = numerics['sec'].apply(lambda x: int(x))
    
    person = signals.merge(numerics, on='sec', how='outer')
    
    # Remove whitespace from column names
    person = person.rename(columns = lambda x: x.strip())
    # Fill columns nans with column averages
    person = person.fillna(person.mean())
    
    return person

def normalize_data(person):
    
    min_max_scaler = MinMaxScaler()
    
    if len(person.shape) > 1:
        cols = list(person.columns)   
        person = person.values             
        person_scaled = min_max_scaler.fit_transform(person)
        person = pd.DataFrame(person_scaled, columns=cols)
    else:
        person_scaled = min_max_scaler.fit_transform(person.values.reshape(-1,1))
        person = pd.DataFrame(person_scaled)
    
    return person, min_max_scaler

In [3]:
person1 = combine_data('csv/bidmc_01_Signals.csv', 'csv/bidmc_01_Numerics.csv')

x_train = person1.drop(['RESP', 'Time [s]'], axis=1)
x_train, x_tr_scaler = normalize_data(x_train)

y_train = person1['RESP']
y_train, y_tr_scaler = normalize_data(y_train)
y_train = np.array(y_train).ravel()

In [4]:
person2 = combine_data('csv/bidmc_02_Signals.csv', 'csv/bidmc_02_Numerics.csv')

x_test = person2.drop(['RESP', 'Time [s]'], axis=1)
x_test, x_test_scaler = normalize_data(x_test)

y_test = person2['RESP']
y_test, y_test_scaler = normalize_data(y_test)
y_test = np.array(y_test).ravel()

In [5]:
svm = SVR(verbose=True)
svm.fit(x_train, y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [6]:
train_preds = svm.predict(x_train)
test_preds = svm.predict(x_test)

In [7]:
print(abs(y_train - train_preds).mean())
print(abs(y_test - test_preds).mean())

0.2582619165799254
0.31182039004154266


In [15]:
descaled_test_true = y_test_scaler.inverse_transform(y_test.reshape(-1,1))
descaled_test_pred = y_test_scaler.inverse_transform(test_preds.reshape(-1,1))

In [16]:
descaled_test_true

array([[0.04059],
       [0.04059],
       [0.04059],
       ...,
       [0.59164],
       [0.60025],
       [0.61009]])

In [17]:
descaled_test_pred

array([[0.3830896 ],
       [0.38351735],
       [0.39534905],
       ...,
       [0.58208011],
       [0.58608776],
       [0.53158639]])