## Converting Time Series Data into Single Value Data 

- Firuz Juraev 

In [130]:
import pandas as pd 
from os import path
import numpy as np 
import math 

In [73]:
def side_by_side(*objs, **kwds):
    from pandas.io.formats.printing import adjoin
    space = kwds.get('space', 4)
    reprs = [repr(obj).split('\n') for obj in objs]
    print (adjoin(space, *reprs))
    print()
    return

In [2]:
data = pd.read_csv('NeonatesTimeSeries_NaN.csv')

In [3]:
data.head()

Unnamed: 0,subject_id,HADM_ID,CHARTTIME,bpCuffDiastolic,bpCuffMean,bpCuffSystolic,glucometer,heartRate,hrAlarmHigh,hrAlarmLow,respRate,sao2,sao2AlarmHigh,sao2AlarmLow,skinTemperature,temperature,LOS
0,14509,100029,0,43.0,49.0,59.0,61.0,154.0,200.0,80.0,56.0,95.0,100.0,90.0,36.5,36.599998,15.006944
1,14509,100029,1,,,,,140.0,200.0,80.0,44.0,99.0,100.0,90.0,36.599998,36.599998,15.006944
2,14509,100029,2,,,,,148.0,200.0,80.0,40.0,98.0,100.0,90.0,36.799999,,15.006944
3,14509,100029,3,39.0,45.0,55.0,,136.0,200.0,80.0,40.0,100.0,100.0,90.0,,36.599998,15.006944
4,14509,100029,4,,,,,132.0,200.0,80.0,56.0,100.0,100.0,90.0,36.700001,,15.006944


## Converting Function

In [131]:
def compress(column_name, subject_id, column_values):
    if not math.isnan(column_values.std()): 
        std_value = column_values.std()
    else: 
        std_value = 0
    mean_value = column_values.mean()
    median_value = column_values.median()
    if len(column_values.mode().values) > 0: 
        mode_value = column_values.mode().values[0]
    else: 
        mode_value = np.nan 
     
    
    # create columns     
    slice_columns = ["subject_id", 
               (column_name + "_mean"),
               (column_name + "_median"),
               (column_name + "_mode"), 
               (column_name + "_std")]
    
    # Creating a row 
    df = pd.DataFrame(columns = slice_columns)
    df.loc[0] = [subject_id, mean_value, median_value, mode_value, std_value]
    
    return df 

In [132]:
def converter(df, filename): 
    df_columns = data.columns.to_list()[3:-1]
    
    parts_list = [] 
    
    for col in df_columns: 
        part = compress(col, df['subject_id'].iloc[0], df[col])
        parts_list.append(part) 
    
    join_dfs = parts_list[0] 
    
    for i in range(1, len(parts_list)): 
        join_dfs = pd.merge(join_dfs, parts_list[i], on='subject_id', how='inner') 
    
    if path.exists(filename): 
        join_dfs.to_csv(filename, mode='a', header=False, index=False)
    else: 
        join_dfs.to_csv(filename, index=False, header=True)
              

In [112]:
df_columns.to_list()[3:-1] 

['bpCuffDiastolic',
 'bpCuffMean',
 'bpCuffSystolic',
 'glucometer',
 'heartRate',
 'hrAlarmHigh',
 'hrAlarmLow',
 'respRate',
 'sao2',
 'sao2AlarmHigh',
 'sao2AlarmLow',
 'skinTemperature',
 'temperature']

In [133]:
# finding unique ids 
unique_subjects_ids = data['subject_id'].unique() 

# Converting 
for i in range(0, len(unique_subjects_ids)): 
    converter(data.loc[data['subject_id'] == unique_subjects_ids[i]], 'test3.csv')         

In [134]:
test = pd.read_csv('test3.csv')
test.head(10)

Unnamed: 0,subject_id,bpCuffDiastolic_mean,bpCuffDiastolic_median,bpCuffDiastolic_mode,bpCuffDiastolic_std,bpCuffMean_mean,bpCuffMean_median,bpCuffMean_mode,bpCuffMean_std,bpCuffSystolic_mean,...,sao2AlarmLow_mode,sao2AlarmLow_std,skinTemperature_mean,skinTemperature_median,skinTemperature_mode,skinTemperature_std,temperature_mean,temperature_median,temperature_mode,temperature_std
0,14509.0,36.25,37.5,27.0,6.800735,44.25,45.5,37.0,5.123475,56.5,...,90.0,0.0,36.48,36.400002,36.299999,0.25967,36.292857,36.299999,36.299999,0.289466
1,11221.0,36.583333,35.0,35.0,7.98867,41.708333,39.5,35.0,7.363123,52.625,...,90.0,1.062367,35.983334,36.0,36.0,0.261545,35.616666,35.599998,35.599998,0.618319
2,11190.0,36.4,36.0,35.0,4.247875,48.0,48.0,48.0,4.690416,74.1,...,87.0,0.0,35.826087,35.799999,36.0,0.1573,35.599998,35.599998,35.599998,0.0
3,11002.0,39.5,39.0,38.0,3.781534,54.666667,55.0,49.0,3.88158,72.333333,...,90.0,0.0,36.124999,36.199999,36.299999,0.236291,36.0,36.0,35.599998,0.36515
4,2507.0,27.428571,30.0,30.0,4.894117,34.714286,35.0,36.0,1.496026,49.142857,...,87.0,0.0,36.413044,36.400002,36.200001,0.226242,36.311111,36.299999,36.299999,0.152966
5,28624.0,38.714286,39.0,24.0,10.889051,51.285714,48.0,48.0,7.158079,74.285714,...,87.0,0.0,35.9125,35.85,35.700001,0.313899,36.025,35.950001,36.0,0.313812
6,28585.0,28.833333,25.0,24.0,7.413951,42.166667,38.0,36.0,8.542053,65.833333,...,90.0,1.659404,36.4125,36.5,36.5,0.174643,36.357143,36.5,36.5,0.234404
7,21221.0,39.333333,41.0,35.0,3.785939,51.333333,53.0,47.0,3.785939,73.666667,...,90.0,0.0,36.2,36.200001,36.200001,0.08165,36.0,36.0,36.0,0.0
8,8176.0,28.333333,27.5,23.0,4.412105,43.5,43.0,43.0,5.43139,65.333333,...,87.0,0.0,35.295833,35.25,35.0,0.439841,35.579167,35.5,35.5,0.480923
9,13028.0,29.8,28.0,25.0,5.308274,41.4,38.5,37.0,6.703233,56.1,...,87.0,0.0,36.525,36.65,36.700001,0.287229,36.190909,36.200001,36.200001,0.104447


In [135]:
side_by_side(test.isnull().sum(), test.count())

subject_id                0    subject_id                3135
bpCuffDiastolic_mean      0    bpCuffDiastolic_mean      3135
bpCuffDiastolic_median    0    bpCuffDiastolic_median    3135
bpCuffDiastolic_mode      0    bpCuffDiastolic_mode      3135
bpCuffDiastolic_std       0    bpCuffDiastolic_std       3135
bpCuffMean_mean           0    bpCuffMean_mean           3135
bpCuffMean_median         0    bpCuffMean_median         3135
bpCuffMean_mode           0    bpCuffMean_mode           3135
bpCuffMean_std            0    bpCuffMean_std            3135
bpCuffSystolic_mean       0    bpCuffSystolic_mean       3135
bpCuffSystolic_median     0    bpCuffSystolic_median     3135
bpCuffSystolic_mode       0    bpCuffSystolic_mode       3135
bpCuffSystolic_std        0    bpCuffSystolic_std        3135
glucometer_mean           1    glucometer_mean           3134
glucometer_median         1    glucometer_median         3134
glucometer_mode           1    glucometer_mode           3134
glucomet