In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import normalize, MinMaxScaler
from sklearn.svm import SVR

In [2]:
def combine_data(person_signal, person_numeric):
    
    signals= pd.read_csv(person_signal, index_col=0)
    numerics = pd.read_csv(person_numeric, index_col=0)
    
    signals['sec'] = signals['Time [s]'].apply(lambda x: int(np.floor(x)))
    
    numerics.rename(columns={'Time [s]':'sec'}, inplace=True)
    numerics.drop(' RESP',axis=1, inplace=True)
    numerics['sec'] = numerics['sec'].apply(lambda x: int(x))
    
    person = signals.merge(numerics, on='sec', how='outer')
    
    # Remove whitespace from column names
    person = person.rename(columns = lambda x: x.strip())
    # Fill columns nans with column averages
    person = person.fillna(person.mean())
    
    return person

def normalize_person(person):
    
    cols = list(person.columns)
    person = person.values #returns a numpy array
    min_max_scaler = MinMaxScaler()
    person_scaled = min_max_scaler.fit_transform(person)
    person = pd.DataFrame(person_scaled, columns=cols)
    
    return person

In [3]:
person1 = combine_data('csv/bidmc_01_Signals.csv', 'csv/bidmc_01_Numerics.csv')
person1 = normalize_person(person1)
person1.head()

Unnamed: 0,Time [s],RESP,PLETH,V,AVR,II,sec,HR,PULSE,SpO2
0,0.0,0.35386,0.447416,0.512224,0.519021,0.611932,0.0,0.857143,0.8,0.5
1,1.7e-05,0.35679,0.439169,0.509293,0.539323,0.584562,0.0,0.857143,0.8,0.5
2,3.3e-05,0.35875,0.430922,0.507338,0.564706,0.554256,0.0,0.857143,0.8,0.5
3,5e-05,0.36168,0.422675,0.502448,0.593894,0.524932,0.0,0.857143,0.8,0.5
4,6.7e-05,0.36364,0.416494,0.504402,0.612928,0.499516,0.0,0.857143,0.8,0.5


In [4]:
x_train = person1.drop(['RESP', 'Time [s]'], axis=1)
y_train = person1['RESP']

In [5]:
svm = SVR()
svm.fit(x_train, y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [6]:
person2 = combine_data('csv/bidmc_02_Signals.csv', 'csv/bidmc_02_Numerics.csv')
person2 = normalize_person(person2)

In [12]:
x_test = person2.drop(['RESP', 'Time [s]'], axis=1)
y_test = person2['RESP']

In [18]:
train_preds = svm.predict(x_train)
test_preds = svm.predict(x_test)

In [19]:
print(abs(y_train - train_preds).mean())
print(abs(y_test - test_preds).mean())

0.28746111381682915
0.31182039004154055
