In [1]:
!pip install pyedflib
!pip install lightgbm
!pip install tqdm
!pip install scikit-learn
!pip install matplotlib
!pip install pandas




In [2]:
import numpy as np
import pyedflib
import os 
import matplotlib.pyplot as plt
from sklearn.preprocessing import RobustScaler
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import LocalOutlierFactor
from scipy.interpolate import interp1d
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from tqdm import tqdm
from sklearn.metrics import confusion_matrix,accuracy_score
import lightgbm

%matplotlib inline


edf_path = './edf/'
annot_path = './annotation/'
base_path = './data/MIT-BIH/'

# Data Prepare

In [3]:
def std_4sigma(data) : 
    data_standadized_np = (data - np.mean(data)) / np.std(data)
    sigma_pos = np.mean(data) + 3*np.std(data)
    sigma_neg = np.mean(data) - (3*np.std(data))
    data[data > sigma_pos] = sigma_pos
    data[data < sigma_neg] = sigma_neg
    return data

def normalized_data(data) : 
    
    import scipy.signal
    
    lowpass = scipy.signal.butter(1, 15.0/(360.0/2.0), 'low')
    highpass = scipy.signal.butter(1, 5.0/(360.0/2.0), 'high')
    # TODO: Could use an actual bandpass filter
    ecg_low = scipy.signal.filtfilt(*lowpass, x=data)
    data = scipy.signal.filtfilt(*highpass, x=ecg_low)
    
    data=std_4sigma(data)
    
    max_v = max(data)
    min_v = min(data)
    
    if(max_v - min_v > 3) : 
        print(max_v, min_v)
        
    
    ave = np.average(data) 
    
    data = (data - ave ) / (max_v - min_v) * 2
    
    return data 


                                                       

In [4]:
sigbufs_data={}
peaks_data={}
labels_data={}

for (path, dir, files) in os.walk(base_path+edf_path):
    for filename in files:
        ext = os.path.splitext(filename)[-1]
        if ext == '.edf':
            f= pyedflib.EdfReader(path+filename)
            ann = open(base_path+annot_path+filename.split('.')[0]+'.ans')
#             print(ann)
            file_num = filename.split('.')[0]

#             sigbufs= f.readSignal(0)
            sigbufs = '_'
            for i in range(f.signals_in_file) : 
                if (f.getLabel(i) in ['MLII', 'ML2']) : 
                    sigbufs = f.readSignal(i)
            if(sigbufs == '_') : 
                print("cannot find MLII in " + str(path+filename))
                continue
#             preprocessing(sigbufs)
            sigbufs_data[file_num] = sigbufs
            
            seq_num=[]
            labels=[]
            lines = ann.readlines()
            for line in lines:
                line = line.strip()
#                 print(line)
                sep = line.split()
#                 print(sep)
                seq_num.append(sep[1])
                labels.append(sep[2])
            peaks_data[file_num] = seq_num
            labels_data[file_num] = labels

print(sigbufs_data['100'][0:20])
print(peaks_data['100'][0:20])
print(labels_data['100'][0:20])
            



cannot find MLII in ./data/MIT-BIH/./edf/102.edf
cannot find MLII in ./data/MIT-BIH/./edf/104.edf
[-0.145 -0.145 -0.145 -0.145 -0.145 -0.145 -0.145 -0.145 -0.12  -0.135
 -0.145 -0.15  -0.16  -0.155 -0.16  -0.175 -0.18  -0.185 -0.17  -0.155]
['18', '77', '370', '662', '946', '1231', '1515', '1809', '2044', '2402', '2706', '2998', '3282', '3560', '3862', '4170', '4466', '4764', '5060', '5346']
['+', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'A', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N']


In [5]:
print(sigbufs_data.keys())
print(len(sigbufs_data.keys()))

dict_keys(['100', '101', '103', '105', '106', '107', '108', '109', '111', '112', '113', '114', '115', '116', '117', '118', '119', '121', '122', '123', '124', '200', '201', '202', '203', '205', '207', '208', '209', '210', '212', '213', '214', '215', '217', '219', '220', '221', '222', '223', '228', '230', '231', '232', '233', '234'])
46


### split train data , test data

In [6]:
# test_person=['105','109','118','119','200','202','210','214','221','223']
# train_person = []
# for item in list(sigbufs_data.keys()):
#     if item not in test_person : 
#         train_person.append(item)
        
# print(train_person)
#Source From Chap.7 in Classification Tehcniques for Medical Image.. by Nilanjan Dey 
ds1=['101','106','108','109','112','114','115','116','118','119','122','124','201','203','205','207','208','209','215','220','223','230']
ds2=['100','103','105','111','113','117','121','123','200','202','210','212','213','214','219','221','222','228','231','233','234']


In [7]:
# for key in sigbufs_data.keys() :
#     for peak in peaks_data[key] : 
#          print(str(peak), sigbufs_data[key][int(peak) - 12 :int(peak) + 12 ])

### Label Change to Num ( 0 : N, 1: S, 2: V , 3: f, 4: Q) 

In [8]:
# label_change = {'N' : 0, 'L' : 0, 'R' : 0, 
#                 'A' : 1 , 'a' : 1, 'J' : 1, 'S' : 1 , 'e' : 1 , 'j' : 1 , 'n' : 1 ,
#                 'V' : 2, 'r' : 2, 'E' : 2, '!' : 2, 
#                 'F' : 3, 
#                 '/' : 4, 'f':4, 'Q': 4,
#                 '+' : -1, '~' : -1 , '|' : -1, 'x' : -1 , '[' : -1, ']' : -1 , '"': -1 }

In [9]:
label_change = {'N' : 0, 'L' : 0, 'R' : 0, 
                'A' : 1 , 'a' : 1, 'J' : 1, 'S' : 1 , 'e' : 1 , 'j' : 1 , 'n' : 5 ,
                'V' : 2, 'r' : 5, 'E' : 2, '!' : 5, 
                'F' : 3, 
                '/' : 4, 'f':4, 'Q': 4, 'u' : 4, 'P' : 4,
                '+' : -1, '~' : -1 , '|' : -1, 'x' : -1 , '[' : -1, ']' : -1 , '"': -1 }

In [10]:
change_label = {}
for key in labels_data.keys():
    change_label[key] = []
    for labels in labels_data[key] : 
        for label in labels : 
            change_label[key].append(label_change[label])
for key in labels_data.keys():
    print(labels_data[key][0:20])
    print(change_label[key][0:20])

['+', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'A', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N']
[-1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
['+', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N']
[-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
['+', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N']
[-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
['+', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N']
[-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
['~', '+', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N']
[-1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
['+', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/']
[-1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
['+', 'N', 'N', 'N', 

# Pandas Prepare

In [11]:
new_peaks={}
new_label={}
for key in peaks_data.keys():
    if(len(peaks_data[key]) != len(change_label[key])) : 
        print(len(peaks_data[key]))   
        print(len(change_label[key]))
        break
    new_peaks[key] = []
    new_label[key] = []
    for idx,data in enumerate(change_label[key]) : 
        if(data == -1) : 
            continue
        new_peaks[key].append(peaks_data[key][idx])
        new_label[key].append(change_label[key][idx])

In [12]:
for key in peaks_data.keys():
    print(len(new_peaks[key]),len(peaks_data[key]), "X" if (len(new_peaks[key]) != len(peaks_data[key])) else "O" )
    

2273 2274 X
1865 1874 X
2084 2091 X
2572 2691 X
2027 2098 X
2137 2140 X
1763 1824 X
2532 2535 X
2124 2133 X
2539 2550 X
1795 1796 X
1879 1890 X
1953 1962 X
2412 2421 X
1535 1539 X
2278 2301 X
1987 2094 X
1863 1876 X
2476 2479 X
1518 1519 X
1619 1634 X
2601 2792 X
1963 2039 X
2136 2146 X
2980 3108 X
2656 2672 X
2332 2385 X
2955 3040 X
3005 3052 X
2650 2685 X
2748 2763 X
3251 3294 X
2262 2297 X
3363 3400 X
2208 2280 X
2154 2312 X
2048 2069 X
2427 2462 X
2483 2634 X
2605 2643 X
2053 2141 X
2256 2466 X
1571 2011 X
1780 1816 X
3079 3152 X
2753 2764 X


In [13]:
modified_peaks = {}
for key in new_peaks.keys():
    modified_peaks[key] = []
    for peak in new_peaks[key] :
        peak=int(peak)
        max_peak = peak
        for idx in range(peak-12, peak+12) : 
            if(sigbufs_data[key][idx] > sigbufs_data[key][max_peak]):
                max_peak = idx
        modified_peaks[key].append(max_peak)
        
print(new_peaks['100'][0:20])
print(modified_peaks['100'][0:20])
print(len(modified_peaks['100']),len(new_label['100']))

print("==========================")

['77', '370', '662', '946', '1231', '1515', '1809', '2044', '2402', '2706', '2998', '3282', '3560', '3862', '4170', '4466', '4764', '5060', '5346', '5633']
[77, 370, 663, 947, 1231, 1515, 1809, 2045, 2403, 2706, 2998, 3283, 3560, 3863, 4171, 4466, 4765, 5061, 5347, 5634]
2273 2273


In [14]:
columns=['q','q_idx','r','s','s_idx','rr_interval','rr_over_L5','rr_interval_all','peak_over_L5','peak_over_all','annot','annot2']
print(columns)

['q', 'q_idx', 'r', 's', 's_idx', 'rr_interval', 'rr_over_L5', 'rr_interval_all', 'peak_over_L5', 'peak_over_all', 'annot', 'annot2']


In [15]:
train_pandas_path = './train_person_1209_std_4.csv'

# pandas_data = pd.DataFrame(columns=columns ) 
train_pandas_data = None
pandas_idx = 0

# if (os.path.exists(train_pandas_path)) : 
if False : 
    train_pandas_data = pd.read_csv(train_pandas_path,index_col=0)
else : 
    
    for edf_filename in ds1 : 
        peaks = modified_peaks[edf_filename]
        annots = new_label[edf_filename]
        sigbuf= sigbufs_data[edf_filename]
        sigbuf = normalized_data(sigbuf)
        #sigbuf = std_4sigma(sigbuf)
        temp = pd.DataFrame(columns=columns )
        rr_intervals=[]
        peaks_all=[]
        for idx in (range(1,len(peaks) -1 )) : 
            rr_intervals.append(peaks[idx] - peaks[idx-1])
            peaks_all.append(sigbuf[peaks[idx]])
        for idx in tqdm(range(len(peaks)-1)) : 
            if(idx < 5) : 
                continue
            if(annots[idx] == 5) : 
                continue
            
            r=sigbuf[peaks[idx]]
            q = min(sigbuf[peaks[idx] - 48 : peaks[idx]])
            q_idx = (48 - np.argmin(sigbuf[peaks[idx] - 48 : peaks[idx]])) / 48.0
            s = min(sigbuf[peaks[idx] : peaks[idx] + 48 ])
            s_idx = (np.argmin(sigbuf[peaks[idx] : peaks[idx] + 48])) / 48.0
            
            rr_interval = peaks[idx] - peaks[idx-1]
            rr_over_L5 = (peaks[idx] - peaks[idx-1]) / ((peaks[idx] - peaks[idx-5]) /5 )
            rr_over_all = (peaks[idx] - peaks[idx-1]) / np.average(rr_intervals) 
            peak_over_L5 = sigbuf[peaks[idx]] /\
                           ((sigbuf[peaks[idx-5]]+sigbuf[peaks[idx-4]]+sigbuf[peaks[idx-3]]+sigbuf[peaks[idx-2]]+sigbuf[peaks[idx-1]])/5)
            peak_over_all = sigbuf[peaks[idx]] / np.average(peaks_all)
            temp.loc[pandas_idx] = [q,q_idx,r,s,s_idx,rr_interval,rr_over_L5,rr_over_all,peak_over_L5, peak_over_all, annots[idx], 1 if annots[idx] == 0.0 else 0 ]
            pandas_idx += 1
    #         print(rr_interval, peaks[idx], peaks[idx-1])
    #         print(q,r,s,rr_interval,peak_over_L5,annots[idx])
    #         print(pandas_data)
        if (train_pandas_data is None ) : 
            train_pandas_data = temp
        else : 
            train_pandas_data = pd.concat([train_pandas_data,temp])

    #         break
    print(train_pandas_data)
    train_pandas_data.to_csv(train_pandas_path, mode='w')
    train_pandas_data = pd.read_csv(train_pandas_path,index_col=0)

  b = a[a_slice]
100%|█████████████████████████████████████████████████████████████████████████████| 1864/1864 [00:01<00:00, 956.49it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 2026/2026 [00:02<00:00, 964.57it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 1762/1762 [00:01<00:00, 990.30it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 2531/2531 [00:02<00:00, 931.98it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 2538/2538 [00:02<00:00, 947.78it/s]
100%|████████████████████████████████████████████████████████████████████████████| 1878/1878 [00:01<00:00, 1000.00it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 1952/1952 [00:01<00:00, 999.60it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 2411/2411 [00:02<00:00, 951.37it/s]
100%|██████████████████

              q     q_idx         r         s     s_idx  rr_interval  \
0     -0.146885  0.291667  0.637103 -0.131476  0.354167        344.0   
1     -0.163234  0.291667  0.704428 -0.151494  0.333333        324.0   
2     -0.186917  0.291667  0.767824 -0.147500  0.375000        313.0   
3     -0.175878  0.270833  0.716078 -0.139979  0.541667        312.0   
4     -0.170045  0.291667  0.719258 -0.141607  0.416667        311.0   
...         ...       ...       ...       ...       ...          ...   
50884 -0.149599  0.354167  0.890019 -0.710607  0.208333        243.0   
50885 -0.175149  0.375000  0.923847 -0.658806  0.187500        244.0   
50886 -0.160607  0.395833  0.883755 -0.655392  0.208333        234.0   
50887 -0.207656  0.375000  0.976432 -0.642695  0.208333        232.0   
50888 -0.192327  0.395833  0.976436 -0.686411  0.187500        234.0   

       rr_over_L5  rr_interval_all  peak_over_L5  peak_over_all  annot  annot2  
0        1.055215         0.987037      0.970119      

In [16]:
test_pandas_path = './test_person_1209_std_4.csv'

# pandas_data = pd.DataFrame(columns=columns ) 

test_pandas_data = None
pandas_idx = 0

# if (os.path.exists(test_pandas_path)) : 
if False : 
    test_pandas_data = pd.read_csv(test_pandas_path,index_col=0)
else : 
    cnt_0 = 0 
    cnt_1 = 0 
    for edf_filename in ds2 : 
        peaks = modified_peaks[edf_filename]
        annots = new_label[edf_filename]
        sigbuf= sigbufs_data[edf_filename]
        sigbuf = normalized_data(sigbuf)
        #sigbuf = std_4sigma(sigbuf)
        temp = pd.DataFrame(columns=columns )
        rr_intervals=[]
        peaks_all=[]
        for idx in (range(1,len(peaks) -1 )) : 
            rr_intervals.append(peaks[idx] - peaks[idx-1])
            peaks_all.append(sigbuf[peaks[idx]])
        for idx in tqdm(range(len(peaks)-1)) : 
            if(idx < 5) : 
                continue
            if(annots[idx] == 5) : 
                continue
                
            if(annots[idx] != 0.0) : 
                cnt_0 += 1 
            else : 
                if(cnt_1 >= cnt_0) :
                    continue
                cnt_1 += 1 
            
            r=sigbuf[peaks[idx]]
            q = min(sigbuf[peaks[idx] - 48 : peaks[idx]])
            q_idx = (48 - np.argmin(sigbuf[peaks[idx] - 48 : peaks[idx]])) / 48.0
            s = min(sigbuf[peaks[idx] : peaks[idx] + 48 ])
            s_idx = (np.argmin(sigbuf[peaks[idx] : peaks[idx] + 48])) / 48.0
            
            rr_interval = peaks[idx] - peaks[idx-1]
            rr_over_L5 = (peaks[idx] - peaks[idx-1]) / ((peaks[idx] - peaks[idx-5]) /5 )
            rr_over_all = (peaks[idx] - peaks[idx-1]) / np.average(rr_intervals) 
            peak_over_L5 = sigbuf[peaks[idx]] /\
                           ((sigbuf[peaks[idx-5]]+sigbuf[peaks[idx-4]]+sigbuf[peaks[idx-3]]+sigbuf[peaks[idx-2]]+sigbuf[peaks[idx-1]])/5)
            peak_over_all = sigbuf[peaks[idx]] / np.average(peaks_all)
            temp.loc[pandas_idx] = [q,q_idx,r,s,s_idx,rr_interval,rr_over_L5,rr_over_all,peak_over_L5, peak_over_all, annots[idx], 1 if annots[idx] == 0.0 else 0 ]
            pandas_idx += 1
    #         print(rr_interval, peaks[idx], peaks[idx-1])
    #         print(q,r,s,rr_interval,peak_over_L5,annots[idx])
    #         print(pandas_data)
        if (test_pandas_data is None ) : 
            test_pandas_data = temp
        else : 
            test_pandas_data = pd.concat([test_pandas_data,temp])

    #         break
    print(test_pandas_data)
    test_pandas_data.to_csv(test_pandas_path, mode='w')
    test_pandas_data = pd.read_csv(test_pandas_path,index_col=0)

  b = a[a_slice]
100%|███████████████████████████████████████████████████████████████████████████| 2272/2272 [00:00<00:00, 31640.20it/s]
100%|██████████████████████████████████████████████████████████████████████████| 2083/2083 [00:00<00:00, 348104.84it/s]
100%|███████████████████████████████████████████████████████████████████████████| 2571/2571 [00:00<00:00, 26039.25it/s]
100%|██████████████████████████████████████████████████████████████████████████| 2123/2123 [00:00<00:00, 709635.59it/s]
100%|██████████████████████████████████████████████████████████████████████████| 1794/1794 [00:00<00:00, 128486.95it/s]
100%|██████████████████████████████████████████████████████████████████████████| 1534/1534 [00:00<00:00, 512674.29it/s]
100%|██████████████████████████████████████████████████████████████████████████| 1862/1862 [00:00<00:00, 373388.51it/s]
100%|██████████████████████████████████████████████████████████████████████████| 1517/1517 [00:00<00:00, 217270.25it/s]
100%|██████████████████

             q     q_idx         r         s     s_idx  rr_interval  \
0    -0.183023  0.250000  0.576815 -0.107303  0.250000        236.0   
1    -0.181602  0.270833  0.630844 -0.146964  0.229167        358.0   
2    -0.138882  0.270833  0.630277 -0.128874  0.250000        188.0   
3    -0.172223  0.270833  0.565816 -0.105813  0.250000        339.0   
4    -0.185494  0.250000  0.624326 -0.109591  0.250000        219.0   
...        ...       ...       ...       ...       ...          ...   
8547 -0.212395  0.333333  0.811261 -0.175968  0.354167        307.0   
8548 -0.089478  0.645833  0.325256 -0.119764  0.666667        164.0   
8549 -0.218017  0.333333  0.832456 -0.195984  0.270833        322.0   
8550 -0.127537  0.604167  0.419947 -0.169328  0.666667        166.0   
8551 -0.223353  0.333333  0.865373 -0.199774  0.270833        315.0   

      rr_over_L5  rr_interval_all  peak_over_L5  peak_over_all  annot  annot2  
0       0.853835         0.824983      0.989037       0.905030    1

### Make Indivisual data

In [17]:
# indi_pandas = './y256_data_indi_std_4sigma'

# # pandas_data = pd.DataFrame(columns=columns ) 


# indi_pandas_data = None
# pandas_idx = 0

# for edf_filename in modified_peaks.keys() : 
#     peaks = modified_peaks[edf_filename]
#     annots = new_label[edf_filename]
#     sigbuf= sigbufs_data[edf_filename]
#     sigbuf= normalized_data(sigbuf)
# #     sigbuf= std_4sigma(sigbuf)
#     temp = pd.DataFrame(columns=columns )
#     for idx in tqdm(range(len(peaks)-1)) : 
#         if(idx < 5) : 
#             continue
#         if(annots[idx] == 5) : 
#             continue
#         data = sigbuf[peaks[idx-1] : peaks[idx+1]]
#         x = np.linspace(0, peaks[idx+1]-peaks[idx-1], num=peaks[idx+1]-peaks[idx-1])
#         y = data
#         f = interp1d(x, y)
#         xnew = np.linspace(0, len(x), num=256,endpoint=True)
#         data_int256 = f(xnew)
#         data_int256 = data_int256.tolist()

#         r_idx = int((peaks[idx] - peaks[idx-1])/(peaks[idx+1]-peaks[idx-1]) * 256)
# #         print(r_idx)
# #         r = sigbuf[peaks[idx]]
# #         q = min(sigbuf[peaks[idx]-12 : peaks[idx]])
# #         s = min(sigbuf[peaks[idx] : peaks[idx] + 12])
#         try : 
#             r_idx += np.argmax(data_int256[r_idx -5 :r_idx +5 ]) + 5
# #             if(r_idx > len(data_int256)) : 
# #                 continue
#             r = data_int256[r_idx]
#         except : 
#             print (peaks[idx-1], peaks[idx], peaks[idx+1])
#             print (annots[idx-1], annots[idx], annots[idx+1])
#             print(r_idx)
#             print(data_int256[r_idx -5 :r_idx +5 ])
# #         print(len(data_int60),data_int60)
# #         print(r_idx-12, r_idx)
# #         print(data_int60[r_idx -12 : r_idx])
#         q = min(data_int256[r_idx -12 : r_idx])
#         s = min(data_int256[r_idx : r_idx+12])
#         rr_interval = peaks[idx] - peaks[idx-1]
#         rr_over_L5 = (peaks[idx] - peaks[idx-1]) / ((peaks[idx] - peaks[idx-5]) /5 )
#         peak_over_L5 = sigbuf[peaks[idx]] /\
#                        ((sigbuf[peaks[idx-5]]+sigbuf[peaks[idx-4]]+sigbuf[peaks[idx-3]]+sigbuf[peaks[idx-2]]+sigbuf[peaks[idx-1]])/5)
#         data_int256.extend([q,r,s,rr_interval,rr_over_L5,peak_over_L5,annots[idx],0 if annots[idx]==0.0 else 1 ])
#         temp.loc[pandas_idx] = data_int256
#         pandas_idx += 1
# #         print(rr_interval, peaks[idx], peaks[idx-1])
# #         print(q,r,s,rr_interval,peak_over_L5,annots[idx])
# #         print(pandas_data)
#     temp.to_csv(indi_pandas+'_'+edf_filename+'.csv', mode='w')


In [18]:
print(train_pandas_data)

              q     q_idx         r         s     s_idx  rr_interval  \
0     -0.146885  0.291667  0.637103 -0.131476  0.354167        344.0   
1     -0.163234  0.291667  0.704428 -0.151494  0.333333        324.0   
2     -0.186917  0.291667  0.767824 -0.147500  0.375000        313.0   
3     -0.175878  0.270833  0.716078 -0.139979  0.541667        312.0   
4     -0.170045  0.291667  0.719258 -0.141607  0.416667        311.0   
...         ...       ...       ...       ...       ...          ...   
50884 -0.149599  0.354167  0.890019 -0.710607  0.208333        243.0   
50885 -0.175149  0.375000  0.923847 -0.658806  0.187500        244.0   
50886 -0.160607  0.395833  0.883755 -0.655392  0.208333        234.0   
50887 -0.207656  0.375000  0.976432 -0.642695  0.208333        232.0   
50888 -0.192327  0.395833  0.976436 -0.686411  0.187500        234.0   

       rr_over_L5  rr_interval_all  peak_over_L5  peak_over_all  annot  annot2  
0        1.055215         0.987037      0.970119      

In [19]:
print(test_pandas_data)

             q     q_idx         r         s     s_idx  rr_interval  \
0    -0.183023  0.250000  0.576815 -0.107303  0.250000        236.0   
1    -0.181602  0.270833  0.630844 -0.146964  0.229167        358.0   
2    -0.138882  0.270833  0.630277 -0.128874  0.250000        188.0   
3    -0.172223  0.270833  0.565816 -0.105813  0.250000        339.0   
4    -0.185494  0.250000  0.624326 -0.109591  0.250000        219.0   
...        ...       ...       ...       ...       ...          ...   
8547 -0.212395  0.333333  0.811261 -0.175968  0.354167        307.0   
8548 -0.089478  0.645833  0.325256 -0.119764  0.666667        164.0   
8549 -0.218017  0.333333  0.832456 -0.195984  0.270833        322.0   
8550 -0.127537  0.604167  0.419947 -0.169328  0.666667        166.0   
8551 -0.223353  0.333333  0.865373 -0.199774  0.270833        315.0   

      rr_over_L5  rr_interval_all  peak_over_L5  peak_over_all  annot  annot2  
0       0.853835         0.824983      0.989037       0.905030    1

In [20]:
#train_pandas_data
#test_pandas_data
train_pandas_data["annot"] = train_pandas_data["annot"].astype(int)
train_pandas_data["annot2"] = train_pandas_data["annot2"].astype(int)
test_pandas_data["annot"] = test_pandas_data["annot"].astype(int)
test_pandas_data["annot2"] = test_pandas_data["annot2"].astype(int)

y_train = train_pandas_data["annot"].values
y_test = test_pandas_data["annot"].values
y_train2 = train_pandas_data["annot2"].values
y_test2 = test_pandas_data["annot2"].values
X_train = train_pandas_data.drop(["annot","annot2"],1)
X_test = test_pandas_data.drop(["annot","annot2"],1)

lgb_train = lgb.Dataset(X_train, y_train2)
lgb_eval = lgb.Dataset(X_test, y_test2)


In [21]:
params = {
#           "objective" : "multiclass",
#           "num_class" : 2,
#           "metric" : "multi_logloss",
          "objective" : "binary",
          "metric" : "cross_entropy",
          "num_leaves" : 256,
          "max_depth": -1,
          "learning_rate" : 0.01,
          "bagging_fraction" : 0.3, 
          "feature_fraction" : 0.3,  
          "bagging_freq" : 5,        
          "bagging_seed" : 1,
          "pos_bagging_fraction" : 0.1,
          "neg_bagging_fraction" : 1.0,
          "boosting" : "rf", 
          "verbosity" : -1
           }

print('Starting training...')
# train
gbm = lgb.train(params,
                lgb_train,
                num_boost_round=100000,
                valid_sets=lgb_eval,
                early_stopping_rounds=500)
#                 init_model='./model_DS1_DS2_190928.txt')

print('Saving model...')
# save model to file
gbm.save_model('./model_train_1209_1.txt')

print('Starting predicting...')
# predict


Starting training...
[1]	valid_0's cross_entropy: 0.679988
Training until validation scores don't improve for 500 rounds
[2]	valid_0's cross_entropy: 0.717203
[3]	valid_0's cross_entropy: 0.475656
[4]	valid_0's cross_entropy: 0.451777
[5]	valid_0's cross_entropy: 0.40767
[6]	valid_0's cross_entropy: 0.399822
[7]	valid_0's cross_entropy: 0.388281
[8]	valid_0's cross_entropy: 0.437706
[9]	valid_0's cross_entropy: 0.411407
[10]	valid_0's cross_entropy: 0.411518
[11]	valid_0's cross_entropy: 0.413698
[12]	valid_0's cross_entropy: 0.428168
[13]	valid_0's cross_entropy: 0.411347
[14]	valid_0's cross_entropy: 0.414018
[15]	valid_0's cross_entropy: 0.404241
[16]	valid_0's cross_entropy: 0.40884
[17]	valid_0's cross_entropy: 0.406194
[18]	valid_0's cross_entropy: 0.400028
[19]	valid_0's cross_entropy: 0.409249
[20]	valid_0's cross_entropy: 0.403591
[21]	valid_0's cross_entropy: 0.405994
[22]	valid_0's cross_entropy: 0.415688
[23]	valid_0's cross_entropy: 0.407255
[24]	valid_0's cross_entropy: 0

[207]	valid_0's cross_entropy: 0.397883
[208]	valid_0's cross_entropy: 0.398634
[209]	valid_0's cross_entropy: 0.398083
[210]	valid_0's cross_entropy: 0.398762
[211]	valid_0's cross_entropy: 0.398933
[212]	valid_0's cross_entropy: 0.39836
[213]	valid_0's cross_entropy: 0.398603
[214]	valid_0's cross_entropy: 0.398523
[215]	valid_0's cross_entropy: 0.398369
[216]	valid_0's cross_entropy: 0.397555
[217]	valid_0's cross_entropy: 0.397511
[218]	valid_0's cross_entropy: 0.398216
[219]	valid_0's cross_entropy: 0.398253
[220]	valid_0's cross_entropy: 0.398077
[221]	valid_0's cross_entropy: 0.397778
[222]	valid_0's cross_entropy: 0.398516
[223]	valid_0's cross_entropy: 0.399455
[224]	valid_0's cross_entropy: 0.399072
[225]	valid_0's cross_entropy: 0.399504
[226]	valid_0's cross_entropy: 0.398898
[227]	valid_0's cross_entropy: 0.398648
[228]	valid_0's cross_entropy: 0.397587
[229]	valid_0's cross_entropy: 0.398405
[230]	valid_0's cross_entropy: 0.397835
[231]	valid_0's cross_entropy: 0.399112
[

[412]	valid_0's cross_entropy: 0.39577
[413]	valid_0's cross_entropy: 0.395532
[414]	valid_0's cross_entropy: 0.396196
[415]	valid_0's cross_entropy: 0.395707
[416]	valid_0's cross_entropy: 0.396007
[417]	valid_0's cross_entropy: 0.396634
[418]	valid_0's cross_entropy: 0.396332
[419]	valid_0's cross_entropy: 0.396788
[420]	valid_0's cross_entropy: 0.396661
[421]	valid_0's cross_entropy: 0.396564
[422]	valid_0's cross_entropy: 0.397051
[423]	valid_0's cross_entropy: 0.396486
[424]	valid_0's cross_entropy: 0.396397
[425]	valid_0's cross_entropy: 0.396225
[426]	valid_0's cross_entropy: 0.396134
[427]	valid_0's cross_entropy: 0.396667
[428]	valid_0's cross_entropy: 0.396986
[429]	valid_0's cross_entropy: 0.396837
[430]	valid_0's cross_entropy: 0.397004
[431]	valid_0's cross_entropy: 0.397408
[432]	valid_0's cross_entropy: 0.397187
[433]	valid_0's cross_entropy: 0.396996
[434]	valid_0's cross_entropy: 0.396881
[435]	valid_0's cross_entropy: 0.397088
[436]	valid_0's cross_entropy: 0.396843
[

In [22]:
params = {
#           "objective" : "multiclass",
#           "num_class" : 2,
#           "metric" : "multi_logloss",
          "objective" : "binary",
          "metric" : "cross_entropy",
          "num_leaves" : 256,
          "max_depth": -1,
          "learning_rate" : 0.01,
          "bagging_fraction" : 0.3, 
          "feature_fraction" : 0.3,  
          "bagging_freq" : 5,        
          "bagging_seed" : 2,
          "pos_bagging_fraction" : 0.2,
          "neg_bagging_fraction" : 1.0,
          "boosting" : "rf", 
          "verbosity" : -1
           }

print('Starting training...')
# train
gbm = lgb.train(params,
                lgb_train,
                num_boost_round=100000,
                valid_sets=lgb_eval,
                early_stopping_rounds=500)
#                 init_model='./model_DS1_DS2_190928.txt')

print('Saving model...')
# save model to file
gbm.save_model('./model_train_1209_2.txt')

print('Starting predicting...')
# predict

Starting training...
[1]	valid_0's cross_entropy: 0.581537
Training until validation scores don't improve for 500 rounds
[2]	valid_0's cross_entropy: 0.555868
[3]	valid_0's cross_entropy: 0.384903
[4]	valid_0's cross_entropy: 0.382708
[5]	valid_0's cross_entropy: 0.341556
[6]	valid_0's cross_entropy: 0.349436
[7]	valid_0's cross_entropy: 0.336422
[8]	valid_0's cross_entropy: 0.352966
[9]	valid_0's cross_entropy: 0.340393
[10]	valid_0's cross_entropy: 0.350041
[11]	valid_0's cross_entropy: 0.345363
[12]	valid_0's cross_entropy: 0.351484
[13]	valid_0's cross_entropy: 0.344448
[14]	valid_0's cross_entropy: 0.345318
[15]	valid_0's cross_entropy: 0.332891
[16]	valid_0's cross_entropy: 0.331377
[17]	valid_0's cross_entropy: 0.32869
[18]	valid_0's cross_entropy: 0.327188
[19]	valid_0's cross_entropy: 0.329149
[20]	valid_0's cross_entropy: 0.326849
[21]	valid_0's cross_entropy: 0.329582
[22]	valid_0's cross_entropy: 0.333344
[23]	valid_0's cross_entropy: 0.33163
[24]	valid_0's cross_entropy: 0

[207]	valid_0's cross_entropy: 0.307649
[208]	valid_0's cross_entropy: 0.30789
[209]	valid_0's cross_entropy: 0.307705
[210]	valid_0's cross_entropy: 0.308065
[211]	valid_0's cross_entropy: 0.307748
[212]	valid_0's cross_entropy: 0.307549
[213]	valid_0's cross_entropy: 0.307985
[214]	valid_0's cross_entropy: 0.307707
[215]	valid_0's cross_entropy: 0.307846
[216]	valid_0's cross_entropy: 0.307741
[217]	valid_0's cross_entropy: 0.307811
[218]	valid_0's cross_entropy: 0.307804
[219]	valid_0's cross_entropy: 0.308107
[220]	valid_0's cross_entropy: 0.308194
[221]	valid_0's cross_entropy: 0.307954
[222]	valid_0's cross_entropy: 0.308524
[223]	valid_0's cross_entropy: 0.308776
[224]	valid_0's cross_entropy: 0.308757
[225]	valid_0's cross_entropy: 0.308689
[226]	valid_0's cross_entropy: 0.308394
[227]	valid_0's cross_entropy: 0.308423
[228]	valid_0's cross_entropy: 0.307816
[229]	valid_0's cross_entropy: 0.308349
[230]	valid_0's cross_entropy: 0.308077
[231]	valid_0's cross_entropy: 0.308473
[

[412]	valid_0's cross_entropy: 0.302103
[413]	valid_0's cross_entropy: 0.301987
[414]	valid_0's cross_entropy: 0.302276
[415]	valid_0's cross_entropy: 0.301963
[416]	valid_0's cross_entropy: 0.30211
[417]	valid_0's cross_entropy: 0.302431
[418]	valid_0's cross_entropy: 0.30223
[419]	valid_0's cross_entropy: 0.302436
[420]	valid_0's cross_entropy: 0.302206
[421]	valid_0's cross_entropy: 0.302012
[422]	valid_0's cross_entropy: 0.302415
[423]	valid_0's cross_entropy: 0.302121
[424]	valid_0's cross_entropy: 0.301934
[425]	valid_0's cross_entropy: 0.302057
[426]	valid_0's cross_entropy: 0.301777
[427]	valid_0's cross_entropy: 0.302146
[428]	valid_0's cross_entropy: 0.302291
[429]	valid_0's cross_entropy: 0.302251
[430]	valid_0's cross_entropy: 0.302208
[431]	valid_0's cross_entropy: 0.302409
[432]	valid_0's cross_entropy: 0.302443
[433]	valid_0's cross_entropy: 0.302468
[434]	valid_0's cross_entropy: 0.302344
[435]	valid_0's cross_entropy: 0.302389
[436]	valid_0's cross_entropy: 0.302433
[4

[617]	valid_0's cross_entropy: 0.301188
[618]	valid_0's cross_entropy: 0.301289
[619]	valid_0's cross_entropy: 0.301464
[620]	valid_0's cross_entropy: 0.301819
[621]	valid_0's cross_entropy: 0.301952
[622]	valid_0's cross_entropy: 0.301845
[623]	valid_0's cross_entropy: 0.301999
[624]	valid_0's cross_entropy: 0.302139
[625]	valid_0's cross_entropy: 0.302087
[626]	valid_0's cross_entropy: 0.302009
[627]	valid_0's cross_entropy: 0.301976
[628]	valid_0's cross_entropy: 0.301877
[629]	valid_0's cross_entropy: 0.302117
[630]	valid_0's cross_entropy: 0.302167
[631]	valid_0's cross_entropy: 0.302143
[632]	valid_0's cross_entropy: 0.302036
[633]	valid_0's cross_entropy: 0.302259
[634]	valid_0's cross_entropy: 0.30222
[635]	valid_0's cross_entropy: 0.302296
[636]	valid_0's cross_entropy: 0.302106
[637]	valid_0's cross_entropy: 0.301998
[638]	valid_0's cross_entropy: 0.302165
[639]	valid_0's cross_entropy: 0.302171
[640]	valid_0's cross_entropy: 0.302088
[641]	valid_0's cross_entropy: 0.302076
[

[822]	valid_0's cross_entropy: 0.300115
[823]	valid_0's cross_entropy: 0.300362
[824]	valid_0's cross_entropy: 0.300366
[825]	valid_0's cross_entropy: 0.300374
[826]	valid_0's cross_entropy: 0.300302
[827]	valid_0's cross_entropy: 0.300206
[828]	valid_0's cross_entropy: 0.3003
[829]	valid_0's cross_entropy: 0.300069
[830]	valid_0's cross_entropy: 0.300027
[831]	valid_0's cross_entropy: 0.300044
[832]	valid_0's cross_entropy: 0.299951
[833]	valid_0's cross_entropy: 0.299902
[834]	valid_0's cross_entropy: 0.299839
[835]	valid_0's cross_entropy: 0.299792
[836]	valid_0's cross_entropy: 0.299793
[837]	valid_0's cross_entropy: 0.299756
[838]	valid_0's cross_entropy: 0.29984
[839]	valid_0's cross_entropy: 0.29973
[840]	valid_0's cross_entropy: 0.299701
[841]	valid_0's cross_entropy: 0.299591
[842]	valid_0's cross_entropy: 0.299483
[843]	valid_0's cross_entropy: 0.29952
[844]	valid_0's cross_entropy: 0.29948
[845]	valid_0's cross_entropy: 0.299547
[846]	valid_0's cross_entropy: 0.299509
[847]	

[1026]	valid_0's cross_entropy: 0.298618
[1027]	valid_0's cross_entropy: 0.29853
[1028]	valid_0's cross_entropy: 0.298418
[1029]	valid_0's cross_entropy: 0.298442
[1030]	valid_0's cross_entropy: 0.298398
[1031]	valid_0's cross_entropy: 0.29839
[1032]	valid_0's cross_entropy: 0.298344
[1033]	valid_0's cross_entropy: 0.298359
[1034]	valid_0's cross_entropy: 0.298097
[1035]	valid_0's cross_entropy: 0.298124
[1036]	valid_0's cross_entropy: 0.298206
[1037]	valid_0's cross_entropy: 0.298206
[1038]	valid_0's cross_entropy: 0.298176
[1039]	valid_0's cross_entropy: 0.298193
[1040]	valid_0's cross_entropy: 0.298102
[1041]	valid_0's cross_entropy: 0.298037
[1042]	valid_0's cross_entropy: 0.297984
[1043]	valid_0's cross_entropy: 0.298015
[1044]	valid_0's cross_entropy: 0.297962
[1045]	valid_0's cross_entropy: 0.29801
[1046]	valid_0's cross_entropy: 0.29794
[1047]	valid_0's cross_entropy: 0.297952
[1048]	valid_0's cross_entropy: 0.297955
[1049]	valid_0's cross_entropy: 0.298012
[1050]	valid_0's cro

[1226]	valid_0's cross_entropy: 0.297759
[1227]	valid_0's cross_entropy: 0.297752
[1228]	valid_0's cross_entropy: 0.297713
[1229]	valid_0's cross_entropy: 0.297834
[1230]	valid_0's cross_entropy: 0.297733
[1231]	valid_0's cross_entropy: 0.297812
[1232]	valid_0's cross_entropy: 0.297953
[1233]	valid_0's cross_entropy: 0.297955
[1234]	valid_0's cross_entropy: 0.298022
[1235]	valid_0's cross_entropy: 0.298036
[1236]	valid_0's cross_entropy: 0.29806
[1237]	valid_0's cross_entropy: 0.297969
[1238]	valid_0's cross_entropy: 0.297742
[1239]	valid_0's cross_entropy: 0.297709
[1240]	valid_0's cross_entropy: 0.297484
[1241]	valid_0's cross_entropy: 0.297485
[1242]	valid_0's cross_entropy: 0.297445
[1243]	valid_0's cross_entropy: 0.297412
[1244]	valid_0's cross_entropy: 0.297171
[1245]	valid_0's cross_entropy: 0.297239
[1246]	valid_0's cross_entropy: 0.2973
[1247]	valid_0's cross_entropy: 0.297289
[1248]	valid_0's cross_entropy: 0.297252
[1249]	valid_0's cross_entropy: 0.297307
[1250]	valid_0's cr

[1426]	valid_0's cross_entropy: 0.296526
[1427]	valid_0's cross_entropy: 0.296517
[1428]	valid_0's cross_entropy: 0.296444
[1429]	valid_0's cross_entropy: 0.296534
[1430]	valid_0's cross_entropy: 0.296471
[1431]	valid_0's cross_entropy: 0.296537
[1432]	valid_0's cross_entropy: 0.29647
[1433]	valid_0's cross_entropy: 0.296427
[1434]	valid_0's cross_entropy: 0.296495
[1435]	valid_0's cross_entropy: 0.296565
[1436]	valid_0's cross_entropy: 0.29669
[1437]	valid_0's cross_entropy: 0.296683
[1438]	valid_0's cross_entropy: 0.296631
[1439]	valid_0's cross_entropy: 0.296683
[1440]	valid_0's cross_entropy: 0.29669
[1441]	valid_0's cross_entropy: 0.296594
[1442]	valid_0's cross_entropy: 0.296631
[1443]	valid_0's cross_entropy: 0.296599
[1444]	valid_0's cross_entropy: 0.296734
[1445]	valid_0's cross_entropy: 0.296727
[1446]	valid_0's cross_entropy: 0.296722
[1447]	valid_0's cross_entropy: 0.296695
[1448]	valid_0's cross_entropy: 0.296774
[1449]	valid_0's cross_entropy: 0.296803
[1450]	valid_0's cr

[1626]	valid_0's cross_entropy: 0.296745
[1627]	valid_0's cross_entropy: 0.296691
[1628]	valid_0's cross_entropy: 0.296669
[1629]	valid_0's cross_entropy: 0.296611
[1630]	valid_0's cross_entropy: 0.296655
[1631]	valid_0's cross_entropy: 0.296585
[1632]	valid_0's cross_entropy: 0.296556
[1633]	valid_0's cross_entropy: 0.296598
[1634]	valid_0's cross_entropy: 0.296585
[1635]	valid_0's cross_entropy: 0.296608
[1636]	valid_0's cross_entropy: 0.296587
[1637]	valid_0's cross_entropy: 0.296505
[1638]	valid_0's cross_entropy: 0.296555
[1639]	valid_0's cross_entropy: 0.296536
[1640]	valid_0's cross_entropy: 0.296594
[1641]	valid_0's cross_entropy: 0.296586
[1642]	valid_0's cross_entropy: 0.296497
[1643]	valid_0's cross_entropy: 0.296477
[1644]	valid_0's cross_entropy: 0.296517
[1645]	valid_0's cross_entropy: 0.296583
[1646]	valid_0's cross_entropy: 0.296581
[1647]	valid_0's cross_entropy: 0.29657
[1648]	valid_0's cross_entropy: 0.29653
[1649]	valid_0's cross_entropy: 0.296556
[1650]	valid_0's c

[1826]	valid_0's cross_entropy: 0.296676
[1827]	valid_0's cross_entropy: 0.296683
[1828]	valid_0's cross_entropy: 0.296669
[1829]	valid_0's cross_entropy: 0.296698
[1830]	valid_0's cross_entropy: 0.296678
[1831]	valid_0's cross_entropy: 0.296654
[1832]	valid_0's cross_entropy: 0.296622
[1833]	valid_0's cross_entropy: 0.296656
[1834]	valid_0's cross_entropy: 0.296597
[1835]	valid_0's cross_entropy: 0.296672
[1836]	valid_0's cross_entropy: 0.296648
[1837]	valid_0's cross_entropy: 0.296657
[1838]	valid_0's cross_entropy: 0.296656
[1839]	valid_0's cross_entropy: 0.296721
[1840]	valid_0's cross_entropy: 0.296662
[1841]	valid_0's cross_entropy: 0.296695
[1842]	valid_0's cross_entropy: 0.296645
[1843]	valid_0's cross_entropy: 0.296588
[1844]	valid_0's cross_entropy: 0.296562
[1845]	valid_0's cross_entropy: 0.296512
[1846]	valid_0's cross_entropy: 0.296534
[1847]	valid_0's cross_entropy: 0.296495
[1848]	valid_0's cross_entropy: 0.296546
[1849]	valid_0's cross_entropy: 0.2965
[1850]	valid_0's c

In [23]:
params = {
#           "objective" : "multiclass",
#           "num_class" : 2,
#           "metric" : "multi_logloss",
          "objective" : "binary",
          "metric" : "cross_entropy",
          "num_leaves" : 256,
          "max_depth": -1,
          "learning_rate" : 0.01,
          "bagging_fraction" : 0.3, 
          "feature_fraction" : 0.3,  
          "bagging_freq" : 5,        
          "bagging_seed" : 3,
          "pos_bagging_fraction" : 0.3,
          "neg_bagging_fraction" : 1.0,
          "boosting" : "rf", 
          "verbosity" : -1
           }

print('Starting training...')
# train
gbm = lgb.train(params,
                lgb_train,
                num_boost_round=100000,
                valid_sets=lgb_eval,
                early_stopping_rounds=500)
#                 init_model='./model_DS1_DS2_190928.txt')

print('Saving model...')
# save model to file
gbm.save_model('./model_train_1209_3.txt')

print('Starting predicting...')
# predict

Starting training...
[1]	valid_0's cross_entropy: 0.556288
Training until validation scores don't improve for 500 rounds
[2]	valid_0's cross_entropy: 0.607363
[3]	valid_0's cross_entropy: 0.408315
[4]	valid_0's cross_entropy: 0.400984
[5]	valid_0's cross_entropy: 0.333636
[6]	valid_0's cross_entropy: 0.332148
[7]	valid_0's cross_entropy: 0.31964
[8]	valid_0's cross_entropy: 0.332523
[9]	valid_0's cross_entropy: 0.322696
[10]	valid_0's cross_entropy: 0.325858
[11]	valid_0's cross_entropy: 0.323892
[12]	valid_0's cross_entropy: 0.325983
[13]	valid_0's cross_entropy: 0.323501
[14]	valid_0's cross_entropy: 0.325376
[15]	valid_0's cross_entropy: 0.307953
[16]	valid_0's cross_entropy: 0.300887
[17]	valid_0's cross_entropy: 0.299212
[18]	valid_0's cross_entropy: 0.302641
[19]	valid_0's cross_entropy: 0.305033
[20]	valid_0's cross_entropy: 0.30513
[21]	valid_0's cross_entropy: 0.307392
[22]	valid_0's cross_entropy: 0.310206
[23]	valid_0's cross_entropy: 0.311837
[24]	valid_0's cross_entropy: 0

[206]	valid_0's cross_entropy: 0.276058
[207]	valid_0's cross_entropy: 0.276118
[208]	valid_0's cross_entropy: 0.276053
[209]	valid_0's cross_entropy: 0.275983
[210]	valid_0's cross_entropy: 0.276366
[211]	valid_0's cross_entropy: 0.275833
[212]	valid_0's cross_entropy: 0.275866
[213]	valid_0's cross_entropy: 0.276402
[214]	valid_0's cross_entropy: 0.275855
[215]	valid_0's cross_entropy: 0.276089
[216]	valid_0's cross_entropy: 0.276131
[217]	valid_0's cross_entropy: 0.276281
[218]	valid_0's cross_entropy: 0.276398
[219]	valid_0's cross_entropy: 0.276622
[220]	valid_0's cross_entropy: 0.276717
[221]	valid_0's cross_entropy: 0.276258
[222]	valid_0's cross_entropy: 0.276694
[223]	valid_0's cross_entropy: 0.277111
[224]	valid_0's cross_entropy: 0.277223
[225]	valid_0's cross_entropy: 0.277089
[226]	valid_0's cross_entropy: 0.277105
[227]	valid_0's cross_entropy: 0.277294
[228]	valid_0's cross_entropy: 0.276943
[229]	valid_0's cross_entropy: 0.277406
[230]	valid_0's cross_entropy: 0.277434


[411]	valid_0's cross_entropy: 0.27166
[412]	valid_0's cross_entropy: 0.271723
[413]	valid_0's cross_entropy: 0.271761
[414]	valid_0's cross_entropy: 0.271918
[415]	valid_0's cross_entropy: 0.271727
[416]	valid_0's cross_entropy: 0.27189
[417]	valid_0's cross_entropy: 0.272112
[418]	valid_0's cross_entropy: 0.272014
[419]	valid_0's cross_entropy: 0.272049
[420]	valid_0's cross_entropy: 0.271765
[421]	valid_0's cross_entropy: 0.271582
[422]	valid_0's cross_entropy: 0.271931
[423]	valid_0's cross_entropy: 0.271775
[424]	valid_0's cross_entropy: 0.271598
[425]	valid_0's cross_entropy: 0.271764
[426]	valid_0's cross_entropy: 0.271668
[427]	valid_0's cross_entropy: 0.271893
[428]	valid_0's cross_entropy: 0.271748
[429]	valid_0's cross_entropy: 0.271695
[430]	valid_0's cross_entropy: 0.271573
[431]	valid_0's cross_entropy: 0.271758
[432]	valid_0's cross_entropy: 0.271826
[433]	valid_0's cross_entropy: 0.271925
[434]	valid_0's cross_entropy: 0.271879
[435]	valid_0's cross_entropy: 0.271946
[4

[616]	valid_0's cross_entropy: 0.271286
[617]	valid_0's cross_entropy: 0.271344
[618]	valid_0's cross_entropy: 0.271338
[619]	valid_0's cross_entropy: 0.271498
[620]	valid_0's cross_entropy: 0.271684
[621]	valid_0's cross_entropy: 0.271777
[622]	valid_0's cross_entropy: 0.27174
[623]	valid_0's cross_entropy: 0.271912
[624]	valid_0's cross_entropy: 0.27202
[625]	valid_0's cross_entropy: 0.271968
[626]	valid_0's cross_entropy: 0.271814
[627]	valid_0's cross_entropy: 0.271782
[628]	valid_0's cross_entropy: 0.271774
[629]	valid_0's cross_entropy: 0.271953
[630]	valid_0's cross_entropy: 0.272009
[631]	valid_0's cross_entropy: 0.272085
[632]	valid_0's cross_entropy: 0.272038
[633]	valid_0's cross_entropy: 0.272162
[634]	valid_0's cross_entropy: 0.272193
[635]	valid_0's cross_entropy: 0.272237
[636]	valid_0's cross_entropy: 0.272142
[637]	valid_0's cross_entropy: 0.272052
[638]	valid_0's cross_entropy: 0.272108
[639]	valid_0's cross_entropy: 0.272076
[640]	valid_0's cross_entropy: 0.272064
[6

[821]	valid_0's cross_entropy: 0.270762
[822]	valid_0's cross_entropy: 0.270848
[823]	valid_0's cross_entropy: 0.271005
[824]	valid_0's cross_entropy: 0.271049
[825]	valid_0's cross_entropy: 0.271022
[826]	valid_0's cross_entropy: 0.270948
[827]	valid_0's cross_entropy: 0.270805
[828]	valid_0's cross_entropy: 0.270874
[829]	valid_0's cross_entropy: 0.270612
[830]	valid_0's cross_entropy: 0.270611
[831]	valid_0's cross_entropy: 0.270507
[832]	valid_0's cross_entropy: 0.270457
[833]	valid_0's cross_entropy: 0.270455
[834]	valid_0's cross_entropy: 0.270458
[835]	valid_0's cross_entropy: 0.270457
[836]	valid_0's cross_entropy: 0.270483
[837]	valid_0's cross_entropy: 0.270476
[838]	valid_0's cross_entropy: 0.270518
[839]	valid_0's cross_entropy: 0.270489
[840]	valid_0's cross_entropy: 0.270515
[841]	valid_0's cross_entropy: 0.270416
[842]	valid_0's cross_entropy: 0.270428
[843]	valid_0's cross_entropy: 0.270388
[844]	valid_0's cross_entropy: 0.270375
[845]	valid_0's cross_entropy: 0.270481


[1025]	valid_0's cross_entropy: 0.269982
[1026]	valid_0's cross_entropy: 0.269988
[1027]	valid_0's cross_entropy: 0.269885
[1028]	valid_0's cross_entropy: 0.26975
[1029]	valid_0's cross_entropy: 0.269812
[1030]	valid_0's cross_entropy: 0.269824
[1031]	valid_0's cross_entropy: 0.269813
[1032]	valid_0's cross_entropy: 0.26982
[1033]	valid_0's cross_entropy: 0.269853
[1034]	valid_0's cross_entropy: 0.269635
[1035]	valid_0's cross_entropy: 0.269671
[1036]	valid_0's cross_entropy: 0.269721
[1037]	valid_0's cross_entropy: 0.269702
[1038]	valid_0's cross_entropy: 0.269647
[1039]	valid_0's cross_entropy: 0.26962
[1040]	valid_0's cross_entropy: 0.269489
[1041]	valid_0's cross_entropy: 0.269475
[1042]	valid_0's cross_entropy: 0.269384
[1043]	valid_0's cross_entropy: 0.269418
[1044]	valid_0's cross_entropy: 0.269328
[1045]	valid_0's cross_entropy: 0.269344
[1046]	valid_0's cross_entropy: 0.26929
[1047]	valid_0's cross_entropy: 0.269224
[1048]	valid_0's cross_entropy: 0.269202
[1049]	valid_0's cro

[1225]	valid_0's cross_entropy: 0.268625
[1226]	valid_0's cross_entropy: 0.268666
[1227]	valid_0's cross_entropy: 0.268696
[1228]	valid_0's cross_entropy: 0.268705
[1229]	valid_0's cross_entropy: 0.268798
[1230]	valid_0's cross_entropy: 0.268732
[1231]	valid_0's cross_entropy: 0.268804
[1232]	valid_0's cross_entropy: 0.268909
[1233]	valid_0's cross_entropy: 0.268928
[1234]	valid_0's cross_entropy: 0.268989
[1235]	valid_0's cross_entropy: 0.268935
[1236]	valid_0's cross_entropy: 0.268972
[1237]	valid_0's cross_entropy: 0.268937
[1238]	valid_0's cross_entropy: 0.268742
[1239]	valid_0's cross_entropy: 0.26872
[1240]	valid_0's cross_entropy: 0.268526
[1241]	valid_0's cross_entropy: 0.268556
[1242]	valid_0's cross_entropy: 0.268562
[1243]	valid_0's cross_entropy: 0.268545
[1244]	valid_0's cross_entropy: 0.268419
[1245]	valid_0's cross_entropy: 0.268463
[1246]	valid_0's cross_entropy: 0.268428
[1247]	valid_0's cross_entropy: 0.268429
[1248]	valid_0's cross_entropy: 0.26842
[1249]	valid_0's c

[1425]	valid_0's cross_entropy: 0.268199
[1426]	valid_0's cross_entropy: 0.26824
[1427]	valid_0's cross_entropy: 0.268284
[1428]	valid_0's cross_entropy: 0.268255
[1429]	valid_0's cross_entropy: 0.268328
[1430]	valid_0's cross_entropy: 0.268254
[1431]	valid_0's cross_entropy: 0.26834
[1432]	valid_0's cross_entropy: 0.268286
[1433]	valid_0's cross_entropy: 0.26821
[1434]	valid_0's cross_entropy: 0.268194
[1435]	valid_0's cross_entropy: 0.268226
[1436]	valid_0's cross_entropy: 0.268313
[1437]	valid_0's cross_entropy: 0.268302
[1438]	valid_0's cross_entropy: 0.268232
[1439]	valid_0's cross_entropy: 0.268307
[1440]	valid_0's cross_entropy: 0.26833
[1441]	valid_0's cross_entropy: 0.268246
[1442]	valid_0's cross_entropy: 0.268256
[1443]	valid_0's cross_entropy: 0.268236
[1444]	valid_0's cross_entropy: 0.268336
[1445]	valid_0's cross_entropy: 0.268285
[1446]	valid_0's cross_entropy: 0.268276
[1447]	valid_0's cross_entropy: 0.268263
[1448]	valid_0's cross_entropy: 0.268316
[1449]	valid_0's cro

[1625]	valid_0's cross_entropy: 0.268412
[1626]	valid_0's cross_entropy: 0.2684
[1627]	valid_0's cross_entropy: 0.268375
[1628]	valid_0's cross_entropy: 0.268382
[1629]	valid_0's cross_entropy: 0.268333
[1630]	valid_0's cross_entropy: 0.268383
[1631]	valid_0's cross_entropy: 0.268315
[1632]	valid_0's cross_entropy: 0.268295
[1633]	valid_0's cross_entropy: 0.268287
[1634]	valid_0's cross_entropy: 0.268286
[1635]	valid_0's cross_entropy: 0.268317
[1636]	valid_0's cross_entropy: 0.268302
[1637]	valid_0's cross_entropy: 0.268284
[1638]	valid_0's cross_entropy: 0.268344
[1639]	valid_0's cross_entropy: 0.268344
[1640]	valid_0's cross_entropy: 0.26841
[1641]	valid_0's cross_entropy: 0.268415
[1642]	valid_0's cross_entropy: 0.26833
[1643]	valid_0's cross_entropy: 0.2683
[1644]	valid_0's cross_entropy: 0.268345
[1645]	valid_0's cross_entropy: 0.268405
[1646]	valid_0's cross_entropy: 0.268422
[1647]	valid_0's cross_entropy: 0.268413
[1648]	valid_0's cross_entropy: 0.268413
[1649]	valid_0's cross

[1825]	valid_0's cross_entropy: 0.268926
[1826]	valid_0's cross_entropy: 0.26891
[1827]	valid_0's cross_entropy: 0.268854
[1828]	valid_0's cross_entropy: 0.268865
[1829]	valid_0's cross_entropy: 0.268862
[1830]	valid_0's cross_entropy: 0.268834
[1831]	valid_0's cross_entropy: 0.268826
[1832]	valid_0's cross_entropy: 0.268825
[1833]	valid_0's cross_entropy: 0.268851
[1834]	valid_0's cross_entropy: 0.268829
[1835]	valid_0's cross_entropy: 0.268895
[1836]	valid_0's cross_entropy: 0.268889
[1837]	valid_0's cross_entropy: 0.26891
[1838]	valid_0's cross_entropy: 0.268895
[1839]	valid_0's cross_entropy: 0.268922
[1840]	valid_0's cross_entropy: 0.268876
[1841]	valid_0's cross_entropy: 0.268898
[1842]	valid_0's cross_entropy: 0.268851
[1843]	valid_0's cross_entropy: 0.268779
[1844]	valid_0's cross_entropy: 0.268773
[1845]	valid_0's cross_entropy: 0.268729
[1846]	valid_0's cross_entropy: 0.268754
[1847]	valid_0's cross_entropy: 0.268734
[1848]	valid_0's cross_entropy: 0.268763
[1849]	valid_0's c

In [24]:
params = {
#           "objective" : "multiclass",
#           "num_class" : 2,
#           "metric" : "multi_logloss",
          "objective" : "binary",
          "metric" : "cross_entropy",
          "num_leaves" : 256,
          "max_depth": -1,
          "learning_rate" : 0.01,
          "bagging_fraction" : 0.3, 
          "feature_fraction" : 0.3,  
          "bagging_freq" : 5,        
          "bagging_seed" : 4,
          "pos_bagging_fraction" : 0.4,
          "neg_bagging_fraction" : 1.0,
          "boosting" : "rf", 
          "verbosity" : -1
           }

print('Starting training...')
# train
gbm = lgb.train(params,
                lgb_train,
                num_boost_round=100000,
                valid_sets=lgb_eval,
                early_stopping_rounds=500)
#                 init_model='./model_DS1_DS2_190928.txt')

print('Saving model...')
# save model to file
gbm.save_model('./model_train_1209_4.txt')

print('Starting predicting...')
# predict

Starting training...
[1]	valid_0's cross_entropy: 0.551649
Training until validation scores don't improve for 500 rounds
[2]	valid_0's cross_entropy: 0.545044
[3]	valid_0's cross_entropy: 0.386161
[4]	valid_0's cross_entropy: 0.382729
[5]	valid_0's cross_entropy: 0.315407
[6]	valid_0's cross_entropy: 0.315254
[7]	valid_0's cross_entropy: 0.31522
[8]	valid_0's cross_entropy: 0.32773
[9]	valid_0's cross_entropy: 0.321308
[10]	valid_0's cross_entropy: 0.322137
[11]	valid_0's cross_entropy: 0.31905
[12]	valid_0's cross_entropy: 0.319461
[13]	valid_0's cross_entropy: 0.318075
[14]	valid_0's cross_entropy: 0.320109
[15]	valid_0's cross_entropy: 0.300381
[16]	valid_0's cross_entropy: 0.288281
[17]	valid_0's cross_entropy: 0.287801
[18]	valid_0's cross_entropy: 0.289626
[19]	valid_0's cross_entropy: 0.291284
[20]	valid_0's cross_entropy: 0.29271
[21]	valid_0's cross_entropy: 0.295336
[22]	valid_0's cross_entropy: 0.296859
[23]	valid_0's cross_entropy: 0.300084
[24]	valid_0's cross_entropy: 0.3

[207]	valid_0's cross_entropy: 0.264283
[208]	valid_0's cross_entropy: 0.263964
[209]	valid_0's cross_entropy: 0.263953
[210]	valid_0's cross_entropy: 0.264207
[211]	valid_0's cross_entropy: 0.263555
[212]	valid_0's cross_entropy: 0.263597
[213]	valid_0's cross_entropy: 0.264076
[214]	valid_0's cross_entropy: 0.263396
[215]	valid_0's cross_entropy: 0.26364
[216]	valid_0's cross_entropy: 0.263936
[217]	valid_0's cross_entropy: 0.264102
[218]	valid_0's cross_entropy: 0.263983
[219]	valid_0's cross_entropy: 0.264246
[220]	valid_0's cross_entropy: 0.264434
[221]	valid_0's cross_entropy: 0.264103
[222]	valid_0's cross_entropy: 0.264541
[223]	valid_0's cross_entropy: 0.264889
[224]	valid_0's cross_entropy: 0.265014
[225]	valid_0's cross_entropy: 0.26488
[226]	valid_0's cross_entropy: 0.26496
[227]	valid_0's cross_entropy: 0.265095
[228]	valid_0's cross_entropy: 0.264747
[229]	valid_0's cross_entropy: 0.265291
[230]	valid_0's cross_entropy: 0.265381
[231]	valid_0's cross_entropy: 0.265555
[23

[412]	valid_0's cross_entropy: 0.258498
[413]	valid_0's cross_entropy: 0.258537
[414]	valid_0's cross_entropy: 0.258734
[415]	valid_0's cross_entropy: 0.258562
[416]	valid_0's cross_entropy: 0.258577
[417]	valid_0's cross_entropy: 0.258654
[418]	valid_0's cross_entropy: 0.25861
[419]	valid_0's cross_entropy: 0.258493
[420]	valid_0's cross_entropy: 0.258175
[421]	valid_0's cross_entropy: 0.258035
[422]	valid_0's cross_entropy: 0.258324
[423]	valid_0's cross_entropy: 0.258265
[424]	valid_0's cross_entropy: 0.25813
[425]	valid_0's cross_entropy: 0.258366
[426]	valid_0's cross_entropy: 0.258322
[427]	valid_0's cross_entropy: 0.258593
[428]	valid_0's cross_entropy: 0.258304
[429]	valid_0's cross_entropy: 0.258332
[430]	valid_0's cross_entropy: 0.258232
[431]	valid_0's cross_entropy: 0.258402
[432]	valid_0's cross_entropy: 0.258551
[433]	valid_0's cross_entropy: 0.258739
[434]	valid_0's cross_entropy: 0.258697
[435]	valid_0's cross_entropy: 0.258769
[436]	valid_0's cross_entropy: 0.258879
[4

[617]	valid_0's cross_entropy: 0.258457
[618]	valid_0's cross_entropy: 0.258388
[619]	valid_0's cross_entropy: 0.258532
[620]	valid_0's cross_entropy: 0.258675
[621]	valid_0's cross_entropy: 0.258777
[622]	valid_0's cross_entropy: 0.258716
[623]	valid_0's cross_entropy: 0.258921
[624]	valid_0's cross_entropy: 0.258987
[625]	valid_0's cross_entropy: 0.258998
[626]	valid_0's cross_entropy: 0.25878
[627]	valid_0's cross_entropy: 0.258779
[628]	valid_0's cross_entropy: 0.258815
[629]	valid_0's cross_entropy: 0.258982
[630]	valid_0's cross_entropy: 0.259065
[631]	valid_0's cross_entropy: 0.259136
[632]	valid_0's cross_entropy: 0.259113
[633]	valid_0's cross_entropy: 0.25921
[634]	valid_0's cross_entropy: 0.259264
[635]	valid_0's cross_entropy: 0.259327
[636]	valid_0's cross_entropy: 0.25919
[637]	valid_0's cross_entropy: 0.259068
[638]	valid_0's cross_entropy: 0.259131
[639]	valid_0's cross_entropy: 0.259115
[640]	valid_0's cross_entropy: 0.25914
[641]	valid_0's cross_entropy: 0.259138
[642

[822]	valid_0's cross_entropy: 0.258429
[823]	valid_0's cross_entropy: 0.258538
[824]	valid_0's cross_entropy: 0.258593
[825]	valid_0's cross_entropy: 0.258534
[826]	valid_0's cross_entropy: 0.258394
[827]	valid_0's cross_entropy: 0.258361
[828]	valid_0's cross_entropy: 0.258453
[829]	valid_0's cross_entropy: 0.258219
[830]	valid_0's cross_entropy: 0.258234
[831]	valid_0's cross_entropy: 0.25816
[832]	valid_0's cross_entropy: 0.258164
[833]	valid_0's cross_entropy: 0.25821
[834]	valid_0's cross_entropy: 0.258228
[835]	valid_0's cross_entropy: 0.258258
[836]	valid_0's cross_entropy: 0.258302
[837]	valid_0's cross_entropy: 0.258317
[838]	valid_0's cross_entropy: 0.258352
[839]	valid_0's cross_entropy: 0.258347
[840]	valid_0's cross_entropy: 0.258388
[841]	valid_0's cross_entropy: 0.258311
Early stopping, best iteration is:
[341]	valid_0's cross_entropy: 0.25736
Saving model...
Starting predicting...


In [25]:
gbm = lightgbm.Booster(model_file='./model_train_1209_1.txt')
y_pred1 = gbm.predict(X_test)
gbm = lightgbm.Booster(model_file='./model_train_1209_2.txt')
y_pred2 = gbm.predict(X_test)
gbm = lightgbm.Booster(model_file='./model_train_1209_3.txt')
y_pred3 = gbm.predict(X_test)
gbm = lightgbm.Booster(model_file='./model_train_1209_4.txt')
y_pred4 = gbm.predict(X_test)
# eval

In [26]:
new_ypred = []
for item in y_pred1 : 
    if(item >= 0.5) :
        new_ypred.append(1)
    else : 
        new_ypred.append(0)
        
print(confusion_matrix(y_test2,new_ypred))
tn,fp,fn,tp = confusion_matrix(y_test2,new_ypred).ravel()
print( (tn+tp) / (tn+fp+fn+tp) * 100)

print('acc', (tn+tp) / (tn+fp+fn+tp) * 100)
print('recall', (tp) / (tp+fn) * 100)
print('precision', (tp) / (tp+fp) * 100)
recall = (tp) / (tp+fn) * 100
preci = (tp) / (tp+fp) * 100
print('F1', 2 * recall * preci / (recall + preci) )

[[3890  386]
 [ 976 3300]]
84.07390084190833
acc 84.07390084190833
recall 77.17492984097288
precision 89.52794357026586
F1 82.89374529012812


In [27]:
new_ypred = []
for item in y_pred2 : 
    if(item >= 0.5) :
        new_ypred.append(1)
    else : 
        new_ypred.append(0)
        
print(confusion_matrix(y_test2,new_ypred))
tn,fp,fn,tp = confusion_matrix(y_test2,new_ypred).ravel()
print( (tn+tp) / (tn+fp+fn+tp) * 100)

print('acc', (tn+tp) / (tn+fp+fn+tp) * 100)
print('recall', (tp) / (tp+fn) * 100)
print('precision', (tp) / (tp+fp) * 100)
recall = (tp) / (tp+fn) * 100
preci = (tp) / (tp+fp) * 100
print('F1', 2 * recall * preci / (recall + preci) )

[[3842  434]
 [ 570 3706]]
88.2600561272217
acc 88.2600561272217
recall 86.66978484565014
precision 89.51690821256038
F1 88.07034220532319


In [28]:
new_ypred = []
for item in y_pred3 : 
    if(item >= 0.5) :
        new_ypred.append(1)
    else : 
        new_ypred.append(0)
        
print(confusion_matrix(y_test2,new_ypred))
tn,fp,fn,tp = confusion_matrix(y_test2,new_ypred).ravel()
print( (tn+tp) / (tn+fp+fn+tp) * 100)

print('acc', (tn+tp) / (tn+fp+fn+tp) * 100)
print('recall', (tp) / (tp+fn) * 100)
print('precision', (tp) / (tp+fp) * 100)
recall = (tp) / (tp+fn) * 100
preci = (tp) / (tp+fp) * 100
print('F1', 2 * recall * preci / (recall + preci) )

[[3764  512]
 [ 350 3926]]
89.92048643592142
acc 89.92048643592142
recall 91.81478016838166
precision 88.46327174402884
F1 90.10787238925866


In [29]:
new_ypred = []
for item in y_pred4 : 
    if(item >= 0.5) :
        new_ypred.append(1)
    else : 
        new_ypred.append(0)
        
print(confusion_matrix(y_test2,new_ypred))
tn,fp,fn,tp = confusion_matrix(y_test2,new_ypred).ravel()
print( (tn+tp) / (tn+fp+fn+tp) * 100)

print('acc', (tn+tp) / (tn+fp+fn+tp) * 100)
print('recall', (tp) / (tp+fn) * 100)
print('precision', (tp) / (tp+fp) * 100)
recall = (tp) / (tp+fn) * 100
preci = (tp) / (tp+fp) * 100
print('F1', 2 * recall * preci / (recall + preci) )

[[3755  521]
 [ 308 3968]]
90.30636108512628
acc 90.30636108512628
recall 92.79700654817586
precision 88.3938516373357
F1 90.54192812321735


In [30]:
new_ypred = []
vote_cont=[y_pred1,y_pred2,y_pred3,y_pred4]
for i in range(len(y_pred1)) : 
    cnt_1 = 0 
    for cont in vote_cont : 
        if (cont[i] >= 0.5) :
            cnt_1 += 1 
    if (cnt_1 >= 2) : 
        new_ypred.append(1)
    else : 
         new_ypred.append(0)

In [31]:
print(confusion_matrix(y_test2,new_ypred))
tn,fp,fn,tp = confusion_matrix(y_test2,new_ypred).ravel()
print( (tn+tp) / (tn+fp+fn+tp) * 100)

print('acc', (tn+tp) / (tn+fp+fn+tp) * 100)
print('recall', (tp) / (tp+fn) * 100)
print('precision', (tp) / (tp+fp) * 100)
recall = (tp) / (tp+fn) * 100
preci = (tp) / (tp+fp) * 100
print('F1', 2 * recall * preci / (recall + preci) )

#model_train_test_0928_leave_64 94.60467930617185

[[3765  511]
 [ 330 3946]]
90.16604303086997
acc 90.16604303086997
recall 92.28250701590271
precision 88.53488893874804
F1 90.36986144509332


In [32]:
print(tp, fp, fn, tn) 

3946 511 330 3765


#  SVM

In [33]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
# sc = StandardScaler()
# sc.fit(X_train)
# X_train_std = sc.transform(X_train)
# X_test_std = sc.transform(X_test)

ml = SVC(kernel='linear', C=1.0, random_state=0)
ml.fit(X_train, y_train2)
y_pred5 = ml.predict(X_test)
print(y_pred5)
print(confusion_matrix(y_test2,y_pred5))
tn,fp,fn,tp = confusion_matrix(y_test2,y_pred5).ravel()
print( (tn+tp) / (tn+fp+fn+tp) * 100)

print('acc', (tn+tp) / (tn+fp+fn+tp) * 100)
print('recall', (tp) / (tp+fn) * 100)
print('precision', (tp) / (tp+fp) * 100)
recall = (tp) / (tp+fn) * 100
preci = (tp) / (tp+fp) * 100
print('F1', 2 * recall * preci / (recall + preci) )

[1 1 1 ... 1 0 1]
[[3167 1109]
 [ 113 4163]]
85.71094480823199
acc 85.71094480823199
recall 97.35734331150609
precision 78.96433990895297
F1 87.20150816925012


#  XGboost

In [34]:
!pip install xgboost



In [35]:
from xgboost import XGBClassifier
model = XGBClassifier()
model.fit(X_train,y_train2)
y_pred6 = model.predict(X_test)
print(y_pred6)
print(confusion_matrix(y_test2,y_pred6))
tn,fp,fn,tp = confusion_matrix(y_test2,y_pred6).ravel()
print( (tn+tp) / (tn+fp+fn+tp) * 100)

print('acc', (tn+tp) / (tn+fp+fn+tp) * 100)
print('recall', (tp) / (tp+fn) * 100)
print('precision', (tp) / (tp+fp) * 100)
recall = (tp) / (tp+fn) * 100
preci = (tp) / (tp+fp) * 100
print('F1', 2 * recall * preci / (recall + preci) )

[1 1 1 ... 1 0 1]
[[3302  974]
 [  47 4229]]
88.06127221702525
acc 88.06127221702525
recall 98.90084190832555
precision 81.28003075148953
F1 89.22882160565462


# Ensemble

In [36]:
new_ypred = []
vote_cont = [y_pred1,y_pred2,y_pred3,y_pred4]
for i in range(len(y_pred1)) : 
    cnt_1 = 0 
    for cont in vote_cont : 
        if (cont[i] >= 0.5) :
            cnt_1 += 1 
    if(y_pred5[i] == 1) : 
        cnt_1 += 1
    if(y_pred6[i] == 1) : 
        cnt_1 += 1
    if (cnt_1 >= (len(vote_cont) + 2) / 2) : 
#     if (cnt_1 >= 5) : 
        new_ypred.append(1)
    else : 
         new_ypred.append(0)

print(confusion_matrix(y_test2,new_ypred))
tn,fp,fn,tp = confusion_matrix(y_test2,new_ypred).ravel()
print( (tn+tp) / (tn+fp+fn+tp) * 100)

print('acc', (tn+tp) / (tn+fp+fn+tp) * 100)
print('recall', (tp) / (tp+fn) * 100)
print('precision', (tp) / (tp+fp) * 100)
recall = (tp) / (tp+fn) * 100
preci = (tp) / (tp+fp) * 100
print('F1', 2 * recall * preci / (recall + preci) )

[[3737  539]
 [ 277 3999]]
90.45837231057062
acc 90.45837231057062
recall 93.52198316183349
precision 88.1225209343323
F1 90.74200136147039
