# Handling Unbalanced Neonates Data 
* by Firuz Juraev (PhD student)

In [76]:
import pandas as pd 
from datetime import datetime
import seaborn as sns
import glob
from os import listdir 
import os.path
from os import path
import numpy as np 
import warnings
warnings.filterwarnings('ignore')

In [77]:
def side_by_side(*objs, **kwds):
    from pandas.io.formats.printing import adjoin
    space = kwds.get('space', 4)
    reprs = [repr(obj).split('\n') for obj in objs]
    print (adjoin(space, *reprs))
    print()
    return

In [78]:
def list_files(directory):
    files = []
    counter = 0
    for f in listdir(directory):
        if f.endswith('.' + "csv"):
            files.append(f)
            counter = counter + 1
    print ("CSV Files: " + str(counter))
    return files

In [79]:
directory_FB = "Data/Balanced/Clean_Data_FB/" 
directory_Mean = "Data/Balanced/Clean_Data_Mean/"
directory_NaN = "Data/Balanced/Clean_Data_NaN/"

We have a problem with following 
* bpCuffDiastolic.csv
* bpCuffMean.csv 
* bpCuffSystolic.csv 
* glucometer.csv 
* skinTemperature.csv 
* hrAlarmLow.csv 
* respRate.csv 

In [80]:
bad_neonates = [100044, 100062, 100081, 159338, 155826, 187822, 145130, 146110, 120546, 157469]

### Functions 

In [83]:
def drop_defected_neonates_rows(filename): 
    df = pd.read_csv(filename) 
    
    for neonate in bad_neonates: 
        drop_list = []

        i = df[df['HADM_ID'] == neonate].index                             

        # Appending indeces to drop list  
        drop_list.append(i) 

        ## droping rows 
        for i in drop_list: 
            df.drop(i, axis=0, inplace=True)
        
    df.to_csv(filename, index=False) 
        
    print('----------------------------------') 
    print("File: " + filename) 
        
    print ("Unique Neonates: " + str(df['HADM_ID'].nunique()))
        
    side_by_side(df.isnull().sum(), df.count()) 

### Load Data 

In [84]:
fb_files   = list_files(directory_FB) 
mean_files = list_files(directory_Mean) 
nan_files  = list_files(directory_NaN) 

for file in fb_files: 
    drop_defected_neonates_rows(directory_FB + file)
    drop_defected_neonates_rows(directory_Mean + file)
    drop_defected_neonates_rows(directory_NaN + file)

CSV Files: 13
CSV Files: 13
CSV Files: 13
----------------------------------
File: Data/Balanced/Clean_Data_FB/bpCuffDiastolic.csv
Unique Neonates: 3135
subject_id    0    subject_id    75240
HADM_ID       0    HADM_ID       75240
CHARTTIME     0    CHARTTIME     75240
VALUENUM      0    VALUENUM      75240
dtype: int64       dtype: int64       

----------------------------------
File: Data/Balanced/Clean_Data_Mean/bpCuffDiastolic.csv
Unique Neonates: 3135
subject_id    0    subject_id    75240
HADM_ID       0    HADM_ID       75240
CHARTTIME     0    CHARTTIME     75240
VALUENUM      0    VALUENUM      75240
dtype: int64       dtype: int64       

----------------------------------
File: Data/Balanced/Clean_Data_NaN/bpCuffDiastolic.csv
Unique Neonates: 3135
subject_id        0    subject_id    75240
HADM_ID           0    HADM_ID       75240
CHARTTIME         0    CHARTTIME     75240
VALUENUM      51386    VALUENUM      23854
dtype: int64           dtype: int64       

--------------

----------------------------------
File: Data/Balanced/Clean_Data_FB/sao2AlarmHigh.csv
Unique Neonates: 3135
subject_id    0    subject_id    75240
HADM_ID       0    HADM_ID       75240
CHARTTIME     0    CHARTTIME     75240
VALUENUM      0    VALUENUM      75240
dtype: int64       dtype: int64       

----------------------------------
File: Data/Balanced/Clean_Data_Mean/sao2AlarmHigh.csv
Unique Neonates: 3135
subject_id    0    subject_id    75240
HADM_ID       0    HADM_ID       75240
CHARTTIME     0    CHARTTIME     75240
VALUENUM      0    VALUENUM      75240
dtype: int64       dtype: int64       

----------------------------------
File: Data/Balanced/Clean_Data_NaN/sao2AlarmHigh.csv
Unique Neonates: 3135
subject_id       0    subject_id    75240
HADM_ID          0    HADM_ID       75240
CHARTTIME        0    CHARTTIME     75240
VALUENUM      5980    VALUENUM      69260
dtype: int64          dtype: int64       

----------------------------------
File: Data/Balanced/Clean_Data_F