In [781]:
import scipy.io
from scipy import stats
from collections import Counter
import numpy as np
from scipy.signal import find_peaks
from interval import interval
import os
import pandas as pd

# Preprocessing steps

![](./files/preprocessing1.png)
![](./files/preprocessing2.png)

In [782]:
mat = scipy.io.loadmat('./physionet_dataset_processed/1001m.mat')

a) FHR spike removal

In [783]:
def fhr_spike_removal(mat):
    # Get FHR signal
    fhr = mat['val'][0]

    i = 0
    while fhr[-1] == 0:
        i = i + 1
        fhr = np.delete(fhr,-1)
    i = 0
    while fhr[0] == 0:
        i = i + 1
        fhr = np.delete(fhr,0)

    spike = []
    for x in range(len(fhr)-1):
        if abs(fhr[x+1]-fhr[x]) > 25*100:
            spike.append(x)

    stable = []
    for x in range(len(fhr)-4):
        if abs(fhr[x+1]-fhr[x]) < 10*100 and abs(fhr[x+2]-fhr[x+1]) < 10*100 and abs(fhr[x+3]-fhr[x+2]) < 10*100 and abs(fhr[x+4]-fhr[x+3]) < 10*100 and fhr[x] != 0:
            stable.append(x)

    n = float('-inf')
    for s in spike:
        if s <= n :
            continue
        l = [i for i in stable if i > s]
        if len(l) > 0:
            n = l[0]
        else:
            break
        for t in range(s+1,n,1):
            fhr[t] = fhr[t-1] + (fhr[n]-fhr[s])/(n-s)
    
    return fhr

In [784]:
fhr = fhr_spike_removal(mat)

b) Filtering of uterine contraction signals

In [785]:
def filter_uterine_contractions(mat):
    # Get UC signal
    uc = mat['val'][1]

    i = 0
    window_size = 17
    moving_averages = []
    while i < len(uc) - window_size + 1:
        this_window = uc[i : i + window_size]
        window_average = sum(this_window) / window_size
        moving_averages.append(window_average)
        i += 1

    return moving_averages

In [786]:
uc = filter_uterine_contractions(mat)

c) Detection of uterine contractions

In [787]:
def uterine_contraction_detection(uc):
    #Detection of contractions
    nuc = [50 * round(x/50) for x in uc]
    mode,_ = stats.mode([x for x in nuc if x > 900])
    mode = mode[0]
    contraction_intervals = []
    i = 0
    contraction_total_time = 0
    while i+20*4 < len(nuc):
        shifted_fhr = [x-mode for x in nuc[i:i+20*4]]
        if all([x>0 for x in shifted_fhr]) == True:
            max_shift = max(shifted_fhr)
            j = i+20*4
            while  j<len(nuc) and nuc[j]-mode>0:
                if nuc[j]-mode>max_shift:
                    max_shift = nuc[j]-mode
                j = j + 1
            if max_shift > 3*100:
                contraction_intervals.append([i,j])
                contraction_total_time = contraction_total_time + j - i
            i = j
        else:
            i = i + 1
    return contraction_intervals, contraction_total_time

In [788]:
contraction_intervals, contraction_total_time = uterine_contraction_detection(uc)
contraction_number_per_second = 4*len(contraction_intervals)/len(uc)

d) Evaluation of abnormal and mean short-term variability(STV)

In [789]:
def stv_calculation(fhr):
    #stv calculation
    stv = []
    #for x in range(len(fhr)-1):
    #    stv.append(abs(fhr[x] - fhr[x+1]))
    for x in range(1,len(fhr)-1):
        stv.append(abs(fhr[x-1] - fhr[x+1]))

    #mean
    stv_mean = sum(stv)/len(stv)
    stv_mean = stv_mean/100
    #abnormal percentage
    stv_abnormal = [x for x in stv if x < 100]
    stv_abn_per = 100*len(stv_abnormal)/len(stv)

    return stv_mean, stv_abn_per

In [790]:
stv_mean, stv_abn_per = stv_calculation(fhr)

e) Filtering of fhr signals

In [791]:
def filter_fhr_signals(fhr):
    i = 0
    window_size = 5
    moving_averages = []
    while i < len(fhr) - window_size + 1:
        this_window = fhr[i : i + window_size]
        window_average = sum(this_window) / window_size
        moving_averages.append(window_average)
        i += 1

    return moving_averages

In [792]:
fhr = filter_fhr_signals(fhr)

f) Quantification of fetal movements

In [793]:
#Skip

g) Estimation of the FHR baseline

In [794]:
def fhr_baseline_estimation(fhr):
    def find_baseline(f,h,BL,imax):
        stv_abn_fr = stv_abn_per/100
        if BL >=  110:
            if BL > 152: 
                for i in range(1,imax):
                    if f[i] >= 110 and f[i] < BL and h[i] > 1.6*stv_abn_fr*h[0]:
                        if f[i] < BL:
                            BL = f[i]
                return BL 
            else:
                if stv_abn_per < 20:
                    F = 4
                if stv_abn_per >= 20 and stv_abn_per < 30:
                    F = 2
                if stv_abn_per >= 30 and stv_abn_per < 40:
                    F = 1
                if stv_abn_per >= 40 and stv_abn_per < 60:
                    F = 0.5
                if stv_abn_per >= 60:
                    F = 1   
                for i in range(1,imax):
                    if f[i] >= 110 and f[i] < BL and h[i] > F*stv_abn_fr*h[0]:
                        if f[i] < BL:
                            BL = f[i]     
                return BL     
        else:
            for i in range(1,imax):
                if f[i] > 110 and h[i] > (1-stv_abn_fr) * (h[0] / 3):
                    BL = f[i]
                    return BL
            for i in range(1,imax):
                if f[i] < BL and h[i] > stv_abn_fr * h[0]:
                    if f[i] < BL:
                        BL = f[i]
            return BL    

    #Rounding to nearest 50
    nfhr = [50 * round(x/50) for x in fhr]
    #Re-scaling
    nfhr = [x/100 for x in nfhr]
    #Counting
    h_count = Counter(nfhr)
    #Sorting
    h_count_sorted = dict(sorted(h_count.items(), key=lambda item: item[1],reverse=True))
    #Getting frequencies
    h_count_sorted.update((x,100*y/len(fhr)) for x,y in h_count_sorted.items())
    #Filtering
    hist = dict()
    for (key, value) in h_count_sorted.items():
        if value >= 0.8:
            hist[key] = value
    #Driver code
    f = list(hist.keys())
    h = list(hist.values())
    imax = len(f)
    BL = f[0]

    return find_baseline(f,h,BL,imax)

In [795]:
baseline = fhr_baseline_estimation(fhr)

h) Detection of accelerations and baseline shifts

In [796]:
def acceleration_detection(fhr, baseline):
    #Detection of accelerations
    acceleration_intervals = []
    i = 0
    while i+15*4 < len(fhr):
        shifted_fhr = [x-baseline*100 for x in fhr[i:i+15*4]]
        if all([x>0 for x in shifted_fhr]) == True:
            shift = max(shifted_fhr)
            j = i+15*4
            while  j<len(fhr) and fhr[j]-baseline*100>0:
                if fhr[j]-baseline*100>shift:
                    shift = fhr[j]-baseline*100
                j = j + 1
            if shift > 15*100:
                acceleration_intervals.append([i,j])
            i = j
        else:
            i = i + 1
    return acceleration_intervals

In [797]:
acceleration_intervals = acceleration_detection(fhr, baseline)
aceleration_per_second = 4*len(acceleration_intervals)/len(fhr)

i) Detection and classification of decelerations

In [798]:
def deceleration_detection(fhr, baseline):
    #Detection of decelerations
    deceleration_intervals = []
    deceleration_total_time = 0
    i = 0
    while i+15*4 < len(fhr):
        shifted_fhr = [x-baseline*100 for x in fhr[i:i+15*4]]
        if all([x<0 for x in shifted_fhr]) == True:
            shift = min(shifted_fhr)
            j = i+15*4
            while j<len(fhr) and fhr[j]-baseline*100<0:
                if fhr[j]-baseline*100<shift:
                    shift = fhr[j]-baseline*100
                j = j + 1
            if abs(shift) > 15*100:
                deceleration_intervals.append([i,j])
                deceleration_total_time = deceleration_total_time + j - i
            i = j
        else:
            i = i + 1
    return deceleration_intervals, deceleration_total_time

In [799]:
def deceleration_classification(deceleration_intervals):
    #Classification of decelerations
    mild_decelerations_number = 0
    prolongued_decelerations_number = 0
    severe_decelerations_number = 0

    for d in deceleration_intervals:
        diff = d[1] - d[0]
        if diff <= 120:
            mild_decelerations_number = mild_decelerations_number + 1
        elif diff>120 and diff<=300:
            prolongued_decelerations_number = prolongued_decelerations_number + 1
        elif diff>300:
            severe_decelerations_number = severe_decelerations_number + 1
    
    return mild_decelerations_number, prolongued_decelerations_number, severe_decelerations_number

In [800]:
def late_deceleration_detection(fhr, baseline):
    #Late decelerations
    late_deceleration_intervals = []
    late_deceleration_total_time = 0
    i = 0
    while i+15*4 < len(fhr):
        shifted_fhr = [x-baseline*100 for x in fhr[i:i+15*4]]
        if all([x<0 for x in shifted_fhr]) == True:
            shift = min(shifted_fhr)
            j = i+15*4
            while j<len(fhr) and fhr[j]-baseline*100<0:
                if fhr[j]-baseline*100<shift:
                    shift = fhr[j]-baseline*100
                j = j + 1
            if abs(shift) > 15*100:
                nadir = min(fhr[i:j])
                nadir_ix = fhr.index(nadir)
                if nadir_ix-i>30*4 and j-nadir_ix>30*4: 
                    late_deceleration_intervals.append([i,j])
                    late_deceleration_total_time = late_deceleration_total_time + j - i
            i = j
        else:
            i = i + 1

    return late_deceleration_intervals, late_deceleration_total_time

In [801]:
def prolongued_deceleration_detection_2(deceleration_intervals):
    #prolongued decelerations 2
    prolongued_deceleration_intervals = []
    prolongued_deceleration_total_time = 0
    prolongued_deceleration_longest = 0

    for d in deceleration_intervals:
        diff = d[1] - d[0]
        if diff > 3*60*4:
            prolongued_deceleration_total_time = prolongued_deceleration_total_time + diff
            prolongued_deceleration_intervals.append(d)
            if diff > prolongued_deceleration_longest:
                prolongued_deceleration_longest = diff
    
    return prolongued_deceleration_intervals, prolongued_deceleration_total_time, prolongued_deceleration_longest

In [802]:
deceleration_intervals, deceleration_total_time = deceleration_detection(fhr, baseline)

mild_decelerations_number, prolongued_decelerations_number, severe_decelerations_number = deceleration_classification(deceleration_intervals)
mild_decelerations_number_per_second = 4*mild_decelerations_number/len(fhr)
prolongued_decelerations_number_per_second = 4*prolongued_decelerations_number/len(fhr)
severe_decelerations_number_per_second = 4*severe_decelerations_number/len(fhr)

late_deceleration_intervals, late_deceleration_total_time = late_deceleration_detection(fhr, baseline)
prolongued_deceleration_intervals, prolongued_deceleration_total_time, prolongued_deceleration_longest = prolongued_deceleration_detection_2(deceleration_intervals)

h) Detection of abnormal and mean long-term variability (LTV)

In [803]:
def ltv_calculation(acceleration_intervals, deceleration_intervals, fhr):
    def join_intervals(acceleration_intervals, deceleration_intervals):
        #Join accelerations and decelerations
        acceleration_intervals = interval(*acceleration_intervals)
        deceleration_intervals = interval(*deceleration_intervals)
        merged_interval = acceleration_intervals | deceleration_intervals
        
        # start = sorted([x[0] for x in intervals])
        # end = sorted([x[1] for x in intervals]) 
        # merged_interval = []
        # j = 0
        # new_start = 0

        # for i in range(len(start)):
        #     if start[i]<end[j]:
        #         continue
        #     else:
        #         j = j + 1
        #         merged_interval.append([start[new_start], end[j]])
        #         new_start = i

        return merged_interval
    
    def complement_interval(merged):
        if len(merged) > 0:
            #Get complement of joined interval
            start = sorted([x[0] for x in merged])
            end = sorted([x[1] for x in merged]) 
            if start[0] == 0:
                start.pop(0)
                start.append(len(fhr))
            if end[-1] == len(fhr):
                end.pop(-1)
                end.insert(0,0)

            complement_intervals = []
            for i in range(len(start)):
                complement_intervals.append([end[i],start[i]])
        else:
            complement_intervals = [[0,len(fhr)]]
        
        return complement_intervals

    #Get ltv
    complement_intervals = complement_interval(join_intervals(acceleration_intervals,deceleration_intervals))
    ltv = []
    for c in complement_intervals:
        diff = c[1] - c[0]
        if diff >= 60*4:
            i = 0
            while i+60*4 <= diff:
                ls = fhr[i:i+60*4]
                max_value = max(ls)
                min_value = min(ls)
                ltv.append(max_value-min_value)
                i = i + 1

    if len(ltv) > 0:
        #Get ltv mean
        ltv_mean = np.mean(ltv)/100
        #Get percentage of abnormal ltv
        ltv_abnormal_per = 100*len([x for x in ltv if x <= 5*100])/len(ltv)
    else:
        ltv_mean = 0.0
        ltv_abnormal_per = 0.0
                
    return complement_intervals, ltv_mean, ltv_abnormal_per

In [804]:
def reduced_variability_check(complement_intervals, deceleration_intervals):
    #Get ltv < 5 (reduced variability)
    ltv_5_total_time = 0
    for c in complement_intervals:
        diff = c[1] - c[0]
        if diff >= 60*4:
            i = 0
            while i+60*4 <= diff:
                ls = fhr[i:i+60*4]
                diff = max(ls) - min(ls)
                if diff < 5*100:
                    ltv_5_total_time = ltv_5_total_time + 1
                i = i + 1

    ltv_5_flg = False
    if ltv_5_total_time > 50*60*4:
        ltv_5_flg = True

    #For decelerations
    ltv_5_total_time_c = 0
    for c in deceleration_intervals:
        diff = c[1] - c[0]
        if diff >= 60*4:
            i = 0
            while i+60*4 <= diff:
                ls = fhr[i:i+60*4]
                diff = max(ls) - min(ls)
                if diff < 5*100:
                    ltv_5_total_time_c = ltv_5_total_time_c + 1
                i = i + 1

    ltv_5_flg_c = False
    if ltv_5_total_time_c > 3*60*4:
        ltv_5_flg_c = True

    return ltv_5_flg or ltv_5_flg_c

In [805]:
def increased_variability_check(complement_intervals):
    #Get ltv > 25 (increased variability)
    ltv_25_time = 0
    for c in complement_intervals:
        diff = c[1] - c[0]
        if diff >= 60*4:
            i = 0
            while i+60*4 <= diff:
                ls = fhr[i:i+60*4]
                diff = max(ls) - min(ls)
                if diff > 25*100:
                    ltv_25_time = ltv_25_time + 1
                i = i + 1

    ltv_25_flg = False
    if ltv_25_flg > 30*60*4:
        ltv_25_flg = True

    return ltv_25_flg

In [806]:
complement_intervals, ltv_mean, ltv_abnormal_per = ltv_calculation(acceleration_intervals, deceleration_intervals, fhr)
ltv_5_flg = reduced_variability_check(complement_intervals, deceleration_intervals)
ltv_25_flg = increased_variability_check(complement_intervals)

i) Histogram properties

In [807]:
def histogram_properties_calculation(fhr):
    #Rounding to nearest 50
    nfhr = [50 * round(x/50) for x in fhr]
    #Re-scaling
    nfhr = [x/100 for x in nfhr]

    #Basic Properties
    f, v = np.histogram(nfhr,bins=20)
    h_min = min(np.delete(v,[0]))
    h_max = max(v)
    h_width = h_max - h_min

    #Peaks
    h_peaks = len(find_peaks(f)[0])

    #Zeros
    h_zeros = 0
    i=0
    while i < len(f):
        if f[i] == 0:
            j = i + 1
            while j<len(f) and f[j] == 0:
                j = j + 1
            i = j
            h_zeros = h_zeros + 1
        else:
            i = i + 1

    #Stats
    h_mean = np.mean(nfhr)
    h_mode,_ = stats.mode(nfhr)
    h_mode = h_mode[0]
    h_median = np.median(nfhr)
    h_variance = np.std(nfhr)

    #Skew
    skew = stats.skew(nfhr)
    if skew<0.5 and skew>-0.5:
        h_tendency = 0
    elif skew>=0.5:
        h_tendency = 1
    elif skew<=-0.5:
        h_tendency = -1

    return h_width, h_min, h_max, h_peaks, h_zeros, h_mean, h_mode, h_median, h_variance, h_tendency

In [808]:
h_width, h_min, h_max, h_peaks, h_zeros, h_mean, h_mode, h_median, h_variance, h_tendency = histogram_properties_calculation(fhr)

j) Fetal Health

![](./files/fetal_health.png)

In [809]:
#Contractions association
def interval_common(contraction_intervals,contraction_total_time,itvl):
    contraction_intervals = interval(*contraction_intervals)
    itvl = interval(*itvl)
    intersection = contraction_intervals&itvl
    intersection_total_time = 0
    for i in intersection:
        d =  i[1] - i[0]
        intersection_total_time = intersection_total_time + d
    if contraction_total_time > 0:
        return intersection_total_time/contraction_total_time
    else:
        return 0.0

In [810]:
def fetal_health_fn(baseline, ltv_5_flg, ltv_25_flg, prolongued_deceleration_longest, contraction_intervals, contraction_total_time, late_deceleration_intervals, late_deceleration_total_time, prolongued_deceleration_intervals, prolongued_deceleration_total_time, ltv_mean, deceleration_intervals):
    #pathological
    baseline_tf = baseline<100
    variability_tf = ltv_5_flg or ltv_25_flg
    repetitive_tf = False
    if prolongued_deceleration_longest > 5*60*4:
        repetitive_tf = True   
    elif ltv_5_flg == True:
        if interval_common(contraction_intervals,contraction_total_time,late_deceleration_intervals) > 0.5 and late_deceleration_total_time > 20*60*4:
            repetitive_tf = True
        elif interval_common(contraction_intervals,contraction_total_time,prolongued_deceleration_intervals) > 0.5 and prolongued_deceleration_total_time > 20*60*4:
            repetitive_tf = True
    else:
        if interval_common(contraction_intervals,contraction_total_time,late_deceleration_intervals) > 0.5 and late_deceleration_total_time > 30*60*4:
            repetitive_tf = True
        elif interval_common(contraction_intervals,contraction_total_time,prolongued_deceleration_intervals) > 0.5 and prolongued_deceleration_total_time > 30*60*4:
            repetitive_tf = True
    if baseline_tf or variability_tf or repetitive_tf == True:
        return 3

    #normal
    baseline_tf = baseline>=110 and baseline<=160
    variability_tf = ltv_mean>=5 and ltv_mean<=25
    repetitive_tf = interval_common(contraction_intervals,contraction_total_time,deceleration_intervals) <= 0.5
    if all([baseline_tf,variability_tf,repetitive_tf]) == True:
        return 1
    
    #suspicious
    return 2

In [811]:
fetal_health = fetal_health_fn(baseline, ltv_5_flg, ltv_25_flg, prolongued_deceleration_longest, contraction_intervals, contraction_total_time, late_deceleration_intervals, late_deceleration_total_time, prolongued_deceleration_intervals, prolongued_deceleration_total_time, ltv_mean, deceleration_intervals)

k) Creating Dataset

In [812]:
mat_files = []
hea_files = []
for root, directories, files in os.walk("./physionet_dataset_processed"):
	for name in files:
		if ".mat" in name:
			mat_files.append(os.path.join(root, name))
		elif ".hea" in name:
			hea_files.append(os.path.join(root, name))
mat_files = sorted(mat_files)
hea_files = sorted(hea_files)

In [813]:
result = []
for file in mat_files:
    mat = scipy.io.loadmat(file)

    fhr = fhr_spike_removal(mat)

    uc = filter_uterine_contractions(mat)

    contraction_intervals, contraction_total_time = uterine_contraction_detection(uc)
    contraction_number_per_second = 4*len(contraction_intervals)/len(uc)

    stv_mean, stv_abn_per = stv_calculation(fhr)

    fhr = filter_fhr_signals(fhr)

    baseline = fhr_baseline_estimation(fhr)

    acceleration_intervals = acceleration_detection(fhr, baseline)
    aceleration_per_second = 4*len(acceleration_intervals)/len(fhr)

    deceleration_intervals, deceleration_total_time = deceleration_detection(fhr, baseline)
    mild_decelerations_number, prolongued_decelerations_number, severe_decelerations_number = deceleration_classification(deceleration_intervals)
    mild_decelerations_number_per_second = 4*mild_decelerations_number/len(fhr)
    prolongued_decelerations_number_per_second = 4*prolongued_decelerations_number/len(fhr)
    severe_decelerations_number_per_second = 4*severe_decelerations_number/len(fhr)
    late_deceleration_intervals, late_deceleration_total_time = late_deceleration_detection(fhr, baseline)
    prolongued_deceleration_intervals, prolongued_deceleration_total_time, prolongued_deceleration_longest = prolongued_deceleration_detection_2(deceleration_intervals)

    complement_intervals, ltv_mean, ltv_abnormal_per = ltv_calculation(acceleration_intervals, deceleration_intervals, fhr)
    ltv_5_flg = reduced_variability_check(complement_intervals, deceleration_intervals)
    ltv_25_flg = increased_variability_check(complement_intervals)

    h_width, h_min, h_max, h_peaks, h_zeros, h_mean, h_mode, h_median, h_variance, h_tendency = histogram_properties_calculation(fhr)

    fetal_health = fetal_health_fn(baseline, ltv_5_flg, ltv_25_flg, prolongued_deceleration_longest, contraction_intervals, contraction_total_time, late_deceleration_intervals, late_deceleration_total_time, prolongued_deceleration_intervals, prolongued_deceleration_total_time, ltv_mean, deceleration_intervals)    
    
    row = [baseline, aceleration_per_second, None, contraction_number_per_second, mild_decelerations_number_per_second, severe_decelerations_number_per_second, prolongued_decelerations_number_per_second, stv_abn_per, stv_mean, ltv_abnormal_per, ltv_mean, h_width, h_min, h_max, h_peaks, h_zeros, h_mode, h_mean, h_median, h_variance, h_tendency, fetal_health]

    result.append(row)

In [None]:
dataset = pd.DataFrame(data=result,columns=["baseline value","accelerations","fetal_movement","uterine_contractions","light_decelerations","severe_decelerations","prolongued_decelerations","abnormal_short_term_variability","mean_value_of_short_term_variability","percentage_of_time_with_abnormal_long_term_variability","mean_value_of_long_term_variability","histogram_width","histogram_min","histogram_max","histogram_number_of_peaks","histogram_number_of_zeroes","histogram_mode","histogram_mean","histogram_median","histogram_variance","histogram_tendency","fetal_health"])

### MAT files with no UC signal (deleted)
1104
1119
1149
1155
1186
1188
1258
1327

l) Other data

In [None]:
#Other info
result = []
for file in hea_files:
    f = open(file, 'r')
    lines = f.readlines()
    cnt = 0
    for line in lines:
        if "#pH" in line:
            val_1 = float(line.strip("#pH").strip())
            continue
        if "#BDecf" in line:
            val_2 = float(line.strip("#BDecf").strip())
            continue
        if "#pCO2" in line:
            val_3 = float(line.strip("#pCO2").strip())
            continue
        if "#BE" in line:
            val_4 = float(line.strip("#BE").strip())
            continue
        if "#Apgar1" in line:
            val_5 = int(line.strip("#Apgar1").strip())
            continue
        if "#Apgar5" in line:
            val_6 = int(line.strip("#Apgar5").strip())
            continue
        if "#Gest. weeks" in line:
            val_7 = int(line.strip("#Gest. weeks").strip())
            continue
        if "#Weight(g)" in line:
            try:
                val_8 = int(line.strip("#Weight(g)").strip())
            except:
                val_8 = None
            continue
        if "#Sex" in line:
            val_9 = int(line.strip("#Sex").strip())
            continue
        if "#Age" in line:
            val_10 = int(line.strip("#Age").strip())
            continue
        if "#Gravidity" in line:
            try:
                val_11 = int(line.strip("#Gravidity").strip())
            except:
                val_11 = None
            continue
        if "#Parity" in line:
            val_12 = int(line.strip("#Parity").strip())
            continue
        if "#Diabetes" in line:
            val_13 = int(line.strip("#Diabetes").strip())
            continue
        if "#Hypertension" in line:
            val_14 = int(line.strip("#Hypertension").strip())
            continue
        if "#Preeclampsia" in line:
            val_15 = int(line.strip("#Preeclampsia").strip())
            continue
        if "#Liq. praecox" in line:
            val_16 = int(line.strip("#Liq. praecox").strip())
            continue
        if "#Pyrexia" in line:
            val_17 = int(line.strip("#Pyrexia").strip())
            continue
        if "#Meconium" in line:
            val_18 = int(line.strip("#Meconium").strip())
            continue
        if "#Presentation" in line:
            try:
                val_19 = int(line.strip("#Presentation").strip())
            except:
                val_19 = None
            continue
        if "#Induced" in line:
            val_20 = int(line.strip("#Induced").strip())
            continue
        if "#I.stage" in line:
            val_21 = int(line.strip("#I.stage").strip())
            continue
        if "#NoProgress" in line:
            val_22 = int(line.strip("#NoProgress").strip())
            continue
        if "#CK/KP" in line:
            val_23 = int(line.strip("#CK/KP").strip())
            continue
        if "#II.stage" in line:
            val_24 = int(line.strip("#II.stage").strip())
            continue
        if "#Deliv. type" in line:
            val_25 = int(line.strip("#Deliv. type").strip())
            continue

        continue
    result.append([val_1, val_2, val_3, val_4, val_5, val_6, val_7, val_8, val_9, val_10, val_11, val_12, val_13, val_14, val_15, val_16, val_17, val_18, val_19, val_20, val_21, val_22, val_23, val_24, val_25])

In [None]:
apgar = pd.DataFrame(data=result, columns=["pH","BDecf","pCO2","BE","Apgar1", "Apgar5", "Gest. weeks", "Weight(g)", "Sex", "Age", "Gravidity", "Parity", "Diabetes", "Hypertension", "Preeclampsia", "Liq. praecox", "Pyrexia", "Meconium", "Presentation", "Induced", "I.stage", "NoProgress", "CK/KP", "II.stage", "Deliv. type"])
dataset = pd.merge(dataset, apgar, left_index=True, right_index=True)
dataset.to_csv("./second_dataset.csv", index=False)


![](./files/apgar.png)