In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
def data_split_with_val(data):
    train_x,test_x,train_y,test_y=train_test_split(np.array(data.iloc[:,:-1]),np.array(data.iloc[:,-1]),test_size=0.2,random_state=42)
    train_x,val_x,train_y,val_y=train_test_split(np.array(train_x),np.array(train_y),test_size=0.25,random_state=42)
    return train_x,train_y,val_x,val_y,test_x,test_y

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score,f1_score,recall_score,precision_score
def get_score(y_true,y_pred):
    matrix=confusion_matrix(y_true, y_pred)
    acc=accuracy_score(y_true,y_pred)
    #f1=f1_score(y_true,y_pred,average='weighted')
    #recall=recall_score(y_true,y_pred,average='weighted')
    #precision=precision_score(y_true,y_pred,average='weighted')
    f1=f1_score(y_true,y_pred,average='binary')
    recall=recall_score(y_true,y_pred,average='binary')
    precision=precision_score(y_true,y_pred,average='binary')
    return matrix,acc,precision,recall,f1

# NB

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
import pymrmr
from matplotlib import pyplot as plt
class NB(object):
    def __init__(self,data):
        self.data=data
    
    def get_split_data(self):# 分割数据集
        self.train_x,self.train_y,self.val_x,self.val_y,self.test_x,self.test_y=data_split_with_val(self.data)
        self.acc_all=[]
        self.precision_all=[]
        self.recall_all=[]
        self.f1_all=[]
        self.clf = GaussianNB()
        
    def get_std(self): #标准化
        std_scaler = StandardScaler().fit(self.data.iloc[:,:-1])
        self.train_x_std = std_scaler.transform(self.train_x)
        self.train_x_std=pd.DataFrame(self.train_x_std)
        self.train_x_std.columns=self.data.columns[:-1]
        self.train_y=pd.DataFrame(self.train_y)
        self.train_y.columns=['class']
        self.val_x_std=std_scaler.transform(self.val_x)
        self.val_x_std=pd.DataFrame(self.val_x_std)
        self.val_x_std.columns=self.data.columns[:-1]
        self.test_x_std = std_scaler.transform(self.test_x)
        self.test_x_std=pd.DataFrame(self.test_x_std)
        self.test_x_std.columns=self.data.columns[:-1]
        
    def get_feature(self): #mRMR特征排序
        train=pd.concat([self.train_y,self.train_x_std],axis=1)
        self.feature=pymrmr.mRMR(train,'MIQ',46)
        
    def choose_feature(self): #特征选择
        self.get_split_data()
        self.get_std()
        self.get_feature()
        clf = GaussianNB()
        for k in range(2,46):
            clf.fit(self.train_x_std[self.feature[:k]],self.train_y['class'])
            val_pred_y=clf.predict(np.array(self.val_x_std[self.feature[:k]]))
            matrix,acc,precision,recall,f1=get_score(self.val_y,val_pred_y)
            self.acc_all.append(acc)
            self.precision_all.append(precision)
            self.recall_all.append(recall)
            self.f1_all.append(f1)
        self.feature_num=self.f1_all.index(max(self.f1_all))+2
        self.feature_list=self.feature[:self.feature_num]
        print("最优特征集合：",self.feature_list)
        print("准确率：",max(self.acc_all))
        print("查准率：",max(self.precision_all))
        print("查全率：",max(self.recall_all))
        print("F1分数：",max(self.f1_all))
        x=list(range(2,46,1))
        fig, ax = plt.subplots(figsize=(16,4),dpi=100)
        plt.xticks(x)
        plt.plot(x,self.acc_all,'black')
        plt.plot(x,self.precision_all,'blue')
        plt.plot(x,self.recall_all,'green')
        plt.plot(x,self.f1_all,'red')
        plt.legend(['acc','precision','recall','f1'])
        plt.show()
        
    def fit(self):
        self.clf.fit(self.train_x_std[self.feature_list],self.train_y['class'])
    
    def predict(self):
        pred_y=self.clf.predict(np.array(self.test_x_std[self.feature_list]))
        self.matrix,self.acc,self.precision,self.recall,self.f1=get_score(self.test_y,pred_y)
        print("\n测试集分数：")
        print("混淆矩阵：\n",self.matrix)
        print("准确率：",self.acc)
        print("查准率：",self.precision)
        print("查全率：",self.recall)
        print("f1：",self.f1)

# SVM

In [None]:
class SVM(object):
    def __init__(self,data):
        self.data=data
    
    def get_split_data(self):
        self.train_x,self.train_y,self.val_x,self.val_y,self.test_x,self.test_y=data_split_with_val(self.data)
        self.acc_all=[]
        self.precision_all=[]
        self.recall_all=[]
        self.f1_all=[]
        self.clf = LinearSVC(max_iter=5000)
        
    def get_std(self):
        std_scaler = StandardScaler().fit(self.data.iloc[:,:-1])
        self.train_x_std = std_scaler.transform(self.train_x)
        self.train_x_std=pd.DataFrame(self.train_x_std)
        self.train_x_std.columns=self.data.columns[:-1]
        self.train_y=pd.DataFrame(self.train_y)
        self.train_y.columns=['class']
        self.val_x_std=std_scaler.transform(self.val_x)
        self.val_x_std=pd.DataFrame(self.val_x_std)
        self.val_x_std.columns=self.data.columns[:-1]
        self.test_x_std = std_scaler.transform(self.test_x)
        self.test_x_std=pd.DataFrame(self.test_x_std)
        self.test_x_std.columns=self.data.columns[:-1]
        
    def get_feature(self):
        train=pd.concat([self.train_y,self.train_x_std],axis=1)
        self.feature=pymrmr.mRMR(train,'MIQ',46)
        
    def choose_feature(self):
        self.get_split_data()
        self.get_std()
        self.get_feature()
        clf = LinearSVC(max_iter=5000)
        for k in range(2,46):
            clf.fit(self.train_x_std[self.feature[:k]],self.train_y['class'])
            val_pred_y=clf.predict(np.array(self.val_x_std[self.feature[:k]]))
            matrix,acc,precision,recall,f1=get_score(self.val_y,val_pred_y)
            self.acc_all.append(acc)
            self.precision_all.append(precision)
            self.recall_all.append(recall)
            self.f1_all.append(f1)
        self.feature_num=self.f1_all.index(max(self.f1_all))+2
        self.feature_list=self.feature[:self.feature_num]
        print("最优特征集合：",self.feature_list)
        print("准确率：",max(self.acc_all))
        print("查准率：",max(self.precision_all))
        print("查全率：",max(self.recall_all))
        print("F1分数：",max(self.f1_all))
        x=list(range(2,46,1))
        fig, ax = plt.subplots(figsize=(16,4),dpi=100)
        plt.xticks(x)
        plt.plot(x,self.acc_all,'black')
        plt.plot(x,self.precision_all,'blue')
        plt.plot(x,self.recall_all,'green')
        plt.plot(x,self.f1_all,'red')
        plt.legend(['acc','precision','recall','f1'])
        plt.show()
        
    def fit(self):
        self.clf.fit(self.train_x_std[self.feature_list],self.train_y['class'])
    
    def predict(self):
        pred_y=self.clf.predict(np.array(self.test_x_std[self.feature_list]))
        self.matrix,self.acc,self.precision,self.recall,self.f1=get_score(self.test_y,pred_y)
        print("\n测试集分数：")
        print("混淆矩阵：\n",self.matrix)
        print("准确率：",self.acc)
        print("查准率：",self.precision)
        print("查全率：",self.recall)
        print("f1：",self.f1)