In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers,activations
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)


import tensorflow as tf
from keras.utils import np_utils

train_data = pd.read_csv(r'/mnt/ML/train.csv')
test_data = pd.read_csv(r'/mnt/ML/testA.csv')

train_get_feature = [k.split(',') for k in train_data.heartbeat_signals]
test_get_feature = [k.split(',') for k in test_data.heartbeat_signals]

train_feature = pd.DataFrame(train_get_feature)
train_feature = train_feature.astype(float)

test_feature = pd.DataFrame(test_get_feature)
test_feature = test_feature.astype(float)


Using TensorFlow backend.


In [2]:
X_train, X_validation, Y_train, Y_validation = train_test_split(train_feature, train_data.label, test_size=0.2,random_state=42)

train_X = np.array(X_train)
train_Y = np.array(Y_train)
X_validation = np.array(X_validation)
Y_validation = np.array(Y_validation)

train_X = train_X.reshape(train_X.shape[0], train_X.shape[1], 1)
X_validation = X_validation.reshape(X_validation.shape[0], X_validation.shape[1], 1)

In [3]:



class Residual(tf.keras.Model):
    def __init__(self, num_channels,kernel_size, use_1x1conv=False, strides=1, **kwargs):
        super(Residual, self).__init__(**kwargs)
        self.conv1 = layers.Conv1D(num_channels,
                                   padding='same',
                                   kernel_size=kernel_size,
                                   strides=strides)
        self.conv2 = layers.Conv1D(num_channels, kernel_size=kernel_size,padding='same')
        if use_1x1conv:
            self.conv3 = layers.Conv1D(num_channels,
                                       kernel_size=1,
                                       strides=strides)
        else:
            self.conv3 = None
        self.bn1 = layers.BatchNormalization()
        self.bn2 = layers.BatchNormalization()

    def call(self, X):
        Y = activations.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        return activations.relu(Y + X)


class ResnetBlock(tf.keras.layers.Layer):
    def __init__(self,num_channels,kernel_size, num_residuals, first_block=False,**kwargs):
        super(ResnetBlock, self).__init__(**kwargs)
        self.listLayers=[]
        for i in range(num_residuals):
            if i == 0 and not first_block:
                self.listLayers.append(Residual(num_channels,kernel_size, use_1x1conv=True, strides=2))
            else:
                self.listLayers.append(Residual(num_channels,kernel_size))      

    def call(self, X):
        for layer in self.listLayers.layers:
            X = layer(X)
        return X


class ResNet(tf.keras.Model):
    def __init__(self,num_blocks,**kwargs):
        super(ResNet, self).__init__(**kwargs)
        self.conv=layers.Conv1D(64, kernel_size=5, padding='same')
        self.bn=layers.BatchNormalization()
        self.relu=layers.LeakyReLU(alpha=0.001)
        self.mp=layers.MaxPool1D(pool_size=5, strides=2, padding='same')
        self.resnet_block1=ResnetBlock(64,8,num_blocks[0], first_block=True)
        self.resnet_block2=ResnetBlock(128,6,num_blocks[1])
        self.resnet_block3=ResnetBlock(256,4,num_blocks[2])
        self.resnet_block4=ResnetBlock(512,2,num_blocks[3])
        self.gap=layers.GlobalAvgPool1D()
        self.fc=layers.Dense(units=4,activation=activations.softmax)

    def call(self, x):
        x=self.conv(x)
        x=self.bn(x)
        x=self.relu(x)
        x=self.mp(x)
        x=self.resnet_block1(x)
        x=self.resnet_block2(x)
        x=self.resnet_block3(x)
        x=self.resnet_block4(x)
        x=self.gap(x)
        x=self.fc(x)
        return x


def build_resnet(keyword = 'resnet34'):
    if keyword == 'resnet34':
        return ResNet([3,4,6,3])
    if keyword == 'resnet18':
        return ResNet([2,2,2,2])



In [4]:
def train_resnet(batch_size, epoch):
    # build model
    net =build_resnet('resnet34')  # resnet20 resnet34
    
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, decay=1e-6, nesterov=False)
#     optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, decay=1e-6)
    
    net.compile(optimizer=optimizer ,loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    net.fit(train_X,
            train_Y,
            epochs=epoch,
            verbose=2,
            validation_data=(X_validation,Y_validation),
            batch_size=batch_size, 
            shuffle=True)
    return net

model_cnn=train_resnet(64,60)
print(model_cnn.summary())
# X_new_train = model_cnn.predict(train_X)
# Y_new_train=train_Y.copy()
# X_new_test = model_cnn.predict(X_validation)
# Y_new_test=Y_validation.copy()

#output 直接分类
def abs_sum(y_tru, y_pre):
    y_tru = pd.get_dummies(data=y_tru)
    y_pre = np.array(y_pre)
    y_tru = np.array(y_tru)
    loss = sum(sum(abs(y_pre - y_tru)))
    return loss


pre = model_cnn.predict(X_validation)
copy_test=pre.copy()
for i in range(len(copy_test)):
    for j in range(len(copy_test[0])):
        copy_test[i][j]=int(copy_test[i][j]+0.5)

print(abs_sum(Y_validation, pre))
print(abs_sum(Y_validation, copy_test))

Train on 80000 samples, validate on 20000 samples
Epoch 1/60


SystemError: <built-in function TF_NewBuffer> returned a result with an error set

In [14]:
test_feature = pd.DataFrame(test_get_feature)
test_feature = test_feature.astype(float)
test_X = np.array(test_feature)
test_X = test_X.reshape(test_X.shape[0], test_X.shape[1], 1)
pre = model_cnn.predict(test_X)
data_test_price = pd.DataFrame(pre,columns = ['label_0','label_1','label_2','label_3'])
results = pd.concat([test_data['id'],data_test_price],axis = 1)
submit_file_z_score = r'/mnt/ML/resnet_A1_230.95916.csv'
results.to_csv(submit_file_z_score,encoding='utf8',index=0)

test_data1 = pd.read_csv(r'/mnt/ML/testB.csv')

test_data1.heartbeat_signals

test_get_feature1 = [k.split(',') for k in test_data1.heartbeat_signals]


test_feature1 = pd.DataFrame(test_get_feature1)
test_feature1 = test_feature1.astype(float)
test_X1 = np.array(test_feature1)
test_X1 = test_X1.reshape(test_X1.shape[0], test_X1.shape[1], 1)
pre1 = model_cnn.predict(test_X1)

data_test_price1 = pd.DataFrame(pre1,columns = ['label_0','label_1','label_2','label_3'])
results1 = pd.concat([test_data1['id'],data_test_price1],axis = 1)
submit_file_z_score1 = r'/mnt/ML/resnet_B1_230.95916.csv'
results1.to_csv(submit_file_z_score1,encoding='utf8',index=0)

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
from sklearn.linear_model import LogisticRegression
import lightgbm as lgb
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
# 单模函数
def build_model_rf(X_train,y_train):
    model = RandomForestRegressor(n_estimators = 300,max_depth=10)
    model.fit(X_train, y_train)
    return model

def build_model_lgb(X_train,y_train):
    model = lgb.LGBMClassifier(boosting_type='gbdt', objective='multiclass', num_class=4,
                               learning_rate=0.01, n_estimators=500,
                               num_leaves=138, max_depth=11, 
                               bagging_fraction=0.64, 
                               feature_fraction=0.93,
                               bagging_freq=49, 
                               min_split_gain=0.288,
                               min_child_weight=6.5,
                               reg_alpha=0.21,
                               reg_lambda=7)
    model.fit(X_train, y_train)
    return model

def build_model_xgb(X_train,y_train):
    param_dist = {'objective':'multi:softmax', 'n_estimators':600, 'max_depth':6, 'use_label_encoder':False,'num_class':4,'early_stopping_rounds':50}
    model = XGBClassifier(**param_dist)
    model.fit(X_train, y_train, eval_set=[(X_train,y_train)], eval_metric='mlogloss')
    return model


In [9]:
# pd.DataFrame(X_new_train).to_csv('x_new_train.csv')
# pd.DataFrame(Y_new_train).to_csv('y_new_train.csv')
# pd.DataFrame(X_new_test).to_csv('x_new_test.csv')
# pd.DataFrame(Y_new_test).to_csv('y_new_test.csv')

In [11]:
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
from sklearn.linear_model import LogisticRegression
import lightgbm as lgb
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
# X_new_train=pd.read_csv('x_new_train.csv')
# Y_new_train=pd.read_csv('y_new_train.csv',names={'label','id'})
# X_new_test=pd.read_csv('x_new_test.csv')
# Y_new_test=pd.read_csv('y_new_test.csv')

# print('predict rf...')
# model_rf = build_model_rf(X_new_train,Y_new_train)
# train_rf = model_rf.predict(X_new_train)
# test_rf = model_rf.predict(X_new_test)


print('predict lgb...')
model_lgb = build_model_lgb(X_new_train,Y_new_train)
train_lgb = model_lgb.predict(X_new_train)
test_lgb = model_lgb.predict(X_new_test)


print('predict xgb...')
model_xgb = build_model_xgb(X_new_train,Y_new_train)
train_xgb = model_xgb.predict(X_new_train)
test_xgb = model_xgb.predict(X_new_test)

predict lgb...
predict xgb...
[0]	validation_0-mlogloss:0.85359
[1]	validation_0-mlogloss:0.58081
[2]	validation_0-mlogloss:0.40892
[3]	validation_0-mlogloss:0.29300
[4]	validation_0-mlogloss:0.21217
[5]	validation_0-mlogloss:0.15469
[6]	validation_0-mlogloss:0.11330
[7]	validation_0-mlogloss:0.08324
[8]	validation_0-mlogloss:0.06131
[9]	validation_0-mlogloss:0.04522
[10]	validation_0-mlogloss:0.03340
[11]	validation_0-mlogloss:0.02469
[12]	validation_0-mlogloss:0.01827
[13]	validation_0-mlogloss:0.01352
[14]	validation_0-mlogloss:0.01001
[15]	validation_0-mlogloss:0.00742
[16]	validation_0-mlogloss:0.00550
[17]	validation_0-mlogloss:0.00408
[18]	validation_0-mlogloss:0.00302
[19]	validation_0-mlogloss:0.00225
[20]	validation_0-mlogloss:0.00167
[21]	validation_0-mlogloss:0.00124
[22]	validation_0-mlogloss:0.00093
[23]	validation_0-mlogloss:0.00069
[24]	validation_0-mlogloss:0.00052
[25]	validation_0-mlogloss:0.00039
[26]	validation_0-mlogloss:0.00029
[27]	validation_0-mlogloss:0.00022


In [17]:
# 加权融合模型，如果w没有变，就是均值融合
def Weighted_method(test_pre2,test_pre3,w=[1/2,1/2]):
    Weighted_result = w[0]*pd.Series(test_pre2)+w[1]*pd.Series(test_pre3)
    return Weighted_result

# 初始权重，可以进行自定义，这里我们随便设置一个权重
w=[1/4,3/4]
train_pre = Weighted_method(train_lgb,train_xgb,w)
test_pre = Weighted_method(test_lgb,test_xgb,w)
# for i1 in range(len(train_pre)):
#     train_pre[i1]=int(train_pre[i1]+0.5)
# for i2 in range(len(test_pre)):
#     test_pre[i2]=int(test_pre[i2]+0.5)
def cut(data):
    d=data.copy()
    for i in range(len(d)):
        d[i]=int(d[i]+0.5)
    return d
# print(test_pre)
# print('train score : ', accuracy_score(cut(train_rf), Y_new_train,normalize=False))
# print('test score : ', accuracy_score(cut(test_rf), Y_new_test,normalize=False))
print('train score : ', accuracy_score(cut(train_lgb), Y_new_train,normalize=False))
print('test score : ', accuracy_score(cut(test_lgb), Y_new_test,normalize=False))

print('train score : ', accuracy_score(cut(train_xgb), Y_new_train,normalize=False))
print('test score : ', accuracy_score(cut(test_xgb), Y_new_test,normalize=False))
print('train score : ', accuracy_score(cut(train_pre), Y_new_train,normalize=False))
print('test score : ', accuracy_score(cut(test_pre), Y_new_test,normalize=False))

train score :  79991
test score :  19799
train score :  80000
test score :  19891
train score :  79999
test score :  19868


In [31]:
def abs_sum(y_tru, y_pre):
    y_pre = pd.get_dummies(data=cut(y_pre))
#     print(y_pre)
    y_tru = pd.get_dummies(data=y_tru)
    y_pre = np.array(y_pre)
    y_tru = np.array(y_tru)
    loss = sum(sum(abs(y_pre - y_tru)))
    return loss
# print(abs_sum(Y_new_test,test_pre))
Y_new_test=Y_new_test.reset_index(drop = True)
print(abs_sum(Y_validation,test_pre))
#512

512


In [33]:
## 第一层

stacking_X_train = pd.DataFrame()
# stacking_X_train['Method_1'] = train_rf
stacking_X_train['Method_2'] = train_lgb
stacking_X_train['Method_3'] = train_xgb

stacking_X_val = pd.DataFrame()
# stacking_X_val['Method_1'] = test_rf
stacking_X_val['Method_2'] = test_lgb
stacking_X_val['Method_3'] = test_xgb



In [34]:
def abs_sum(y_tru, y_pre):
    y_pre=pd.get_dummies(data=y_pre)
    y_tru = pd.get_dummies(data=y_tru)
    y_pre = np.array(y_pre)
    y_tru = np.array(y_tru)
    loss = sum(sum(abs(y_pre - y_tru)))
    return loss
# 第二层是用random forest
model_lr_stacking = build_model_lgb(stacking_X_train,Y_new_train)

## 训练集
train_pre_Stacking = model_lr_stacking.predict(stacking_X_train)
print(abs_sum(train_pre_Stacking,Y_new_train))
## 验证集
val_pre_Stacking = model_lr_stacking.predict(stacking_X_val)
print(abs_sum(val_pre_Stacking,Y_new_test))
#0 512


0
512


In [62]:
def abs_sum(y_tru, y_pre):
    y_tru = pd.get_dummies(data=y_tru)
    y_pre = np.array(y_pre)
    y_tru = np.array(y_tru)
    loss = sum(sum(abs(y_pre - y_tru)))
    return loss

pre = model_cnn.predict(X_validation)
print(abs_sum(Y_validation, pre))


348.5404987335205


In [63]:
test_data=pre.copy()
for ii in range(20000):
    for jj in range(4):
        test_data[ii][jj]=int(test_data[ii][jj]+0.5)

print(abs_sum(Y_validation, test_data))
#50epoch 384(cut338)
#30 470(cut394)
#40 330(cut295) bs128 314(cut285)adam sgd401(cut349) adam326(cut286) 
#40 pooling 16 8 adam390(cut335) 
#30 sgd439(cut354)16 7 3 sgd309(cut294)16 16 16 adam341(cut305)



317.0


In [None]:
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
import lightgbm as lgb
def build_model_rf(X_train,y_train):
    model = RandomForestRegressor(n_estimators = 100)
    model.fit(X_train, y_train)
    return model


def build_model_lgb(X_train,y_train):
    model = lgb.LGBMRegressor(num_leaves=63,learning_rate = 0.1,n_estimators = 100)
    model.fit(X_train, y_train)
    return model

In [None]:
def oneHotRound(val_p):
    d=val_p.copy()
    for i in range(len(val_p)):
        d[i]=int(d[i]+0.5)
    d=pd.get_dummies(data=d)
    

In [None]:
print('predict rf...')
model_rf = build_model_rf(train_X,train_Y)
val_rf = model_rf.predict(train_X)
subA_rf = model_rf.predict(X_validation)
print(abs_sum(Y_validation, oneHotRound(subA_rf)))


print('predict lgb...')
model_lgb = build_model_lgb(train_X,train_Y)
val_lgb = model_lgb.predict(train_X)
subA_lgb = model_lgb.predict(X_validation)
print(abs_sum(Y_validation, oneHotRound(subA_lgb)))


print('predict NN...')
val_nn = model.predict(train_X)
subA_nn = model.predict(X_validation)
print(abs_sum(Y_validation, subA_nn))