## SVM 分类

In [1]:
from sklearn.svm import SVC
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate#交叉验证
from sklearn.model_selection import cross_val_score #交叉验证
from sklearn.metrics import matthews_corrcoef
import pandas as pd
import joblib
from sklearn import metrics
import math
import numpy as np
from sklearn.metrics import classification_report
import warnings
import pickle
warnings.filterwarnings("ignore")


In [2]:
# 读取数据
iris = datasets.load_iris()
X = iris.data
y = iris.target

# 标准化
std = StandardScaler()
X_std = std.fit_transform(X)

# 拆分训练集
X_train, X_test, y_train, y_test = train_test_split(X_std, y, test_size=0.3)

# SVM建模
svm_classification = SVC()
svm_classification.fit(X_train, y_train)
y_pred = svm_classification.predict(X_test)
# 模型效果
svm_classification.score(X_test, y_test)
score = [classification_report(y_test,y_pred)]

In [96]:
score.append("acc")
with open('爬到的数据.txt', 'a+', encoding='utf-8') as f:
    for data in score:
        f.write(data+'\n')

f.close()

## SVM 回归

In [3]:


# 读取数据
boston = datasets.load_boston()
X = boston.data
y = boston.target

# 标准化
std = StandardScaler()
X_std = std.fit_transform(X)

# 拆分训练集
X_train, X_test, y_train, y_test = train_test_split(X_std, y, test_size=0.3)

# SVM建模
svm_regression = SVR(C=2, kernel='rbf')
print(cross_val_score(svm_regression,X,y,cv=5,scoring="r2"))
svm_regression.fit(X_train, y_train)

# 模型效果
svm_regression.score(X_test, y_test)

[ 0.16291174  0.03071482 -0.61395781  0.06783386 -0.04146431]


0.6922890215581634

## 模型调参：网格搜索

In [23]:

# 定义参数的组合
params = {
    "kernel":['linear', 'rbf', 'poly', 'sigmoid'],
    'C':[0.01, 0.1, 0.5, 1, 2,]
}

# 用网格搜索拟合模型
model = GridSearchCV(svm_regression, param_grid=params, cv=10)
model.fit(X,y)

# 查看结果
print("最好的参数组和：", model.best_params_)
print("最好的得分：", model.best_score_)


最好的参数组和： {'C': 2, 'kernel': 'linear'}
最好的得分： 0.3151273617911972


## part1 SVM_Regression applied in ads effectiveness prediction

In [2]:
df = pd.read_csv("./data/ads_3.csv")

X = df[df.columns[:62]]
Y = df[df.columns[62:]]
std = StandardScaler()     #标准化特征
X_std = std.fit_transform(X)


In [3]:
MSE = []
RMSE_validation = []
RMSE_test = []
R_squared_test = []
R_squared_validation = []


for i in range(12):
    y = Y[Y.columns[i]]
    y_std = std.fit_transform(np.array(y).reshape(-1,1))
    X_train, X_test, y_train, y_test = train_test_split(X_std, y_std.ravel(), random_state=0, train_size=0.7)    
    params = {
        "kernel":['linear', 'rbf', 'poly', 'sigmoid'],
        'C':[0.01, 0.1, 0.5, 1, 2, 10, 100]
    }
    
    svm_regression = SVR()
    model = GridSearchCV(svm_regression, param_grid=params, cv=5)
    model.fit(X_train, y_train)
    C = model.best_params_["C"]
    kernel = model.best_params_["kernel"]
    
    svm_regression = SVR(C=C, kernel=kernel)

    cv_score = cross_validate(svm_regression       #实例化的模型
				, X   #完整的特征值
				, y_std.ravel() #完整的目标值
				, cv=5         #几折交叉验证
				,scoring = ["neg_mean_squared_error","neg_root_mean_squared_error","r2"]   
				)

    
    MSE.append(cv_score["test_neg_mean_squared_error"].mean())
    RMSE_validation.append(cv_score["test_neg_root_mean_squared_error"].mean())
    R_squared_validation.append(cv_score["test_r2"].mean())



    svm_regression = SVR(C=C, kernel=kernel)
    svm_regression.fit(X_train, y_train)
    R_squared_test.append(svm_regression.score(X_test,y_test))
    RMSE_test.append(math.sqrt(metrics.mean_squared_error(y_test, svm_regression.predict(X_test))))
    joblib.dump(svm_regression, "model/SVM_regression/model{}.pkl".format(i+1))



In [5]:
MSE = np.array(MSE) * -1
RMSE_validation = np.array(RMSE_validation) * -1
result_dic = {"MSE":MSE, "RMSE_validation":RMSE_validation, "RMSE_test":RMSE_test, "R_squared_validation":R_squared_validation, "R_squared_test":R_squared_test}
result_df = pd.DataFrame(result_dic, index=Y.columns)
result_df.to_csv("result/SVM_regression.csv")

## part2 SVM_Classification applied in ads effectiveness prediction

In [6]:
df = pd.read_csv("./data/ads_3.csv")

X = df[df.columns[:62]]
Y = df[df.columns[62:]]
Y = round(Y*5).astype(int)
std = StandardScaler()     #标准化特征
X_std = std.fit_transform(X)


In [None]:
recall = []
f1_score = []
acc_validation = []
acc_test = []
mcc = []
for i in range(12):
    y = Y[Y.columns[i]]
    X_train, X_test, y_train, y_test = train_test_split(X_std, y, random_state=0, train_size=0.7)
    
    params = {
        "kernel":['linear', 'rbf', 'poly', 'sigmoid'],
        'C':[0.01, 0.1, 0.5, 1, 2, 10, 100]
    }
    
    svm_classification = SVC()
    model = GridSearchCV(svm_classification, param_grid=params, cv=5)
    model.fit(X_train, y_train)
    C = model.best_params_["C"]
    kernel = model.best_params_["kernel"]

    
    svm_classification = SVC(C=C, kernel=kernel)

    cv_score = cross_validate(svm_classification       #实例化的模型
				, X   #完整的特征值
				, y #完整的目标值
				, cv=5         #几折交叉验证
				,scoring = ["accuracy","recall_micro","f1_micro"]   
				)

    recall.append(cv_score["test_recall_micro"].mean())
    f1_score.append(cv_score["test_f1_micro"].mean())
    acc_validation.append(cv_score["test_accuracy"].mean())

    svm_classification = SVC(C=C, kernel=kernel)
    svm_classification.fit(X_train, y_train)
    mcc.append(matthews_corrcoef(y_test,svm_classification.predict(X_test)))
    joblib.dump(svm_regression, "model/SVM_classification/model{}.pkl".format(i+1))
    
    acc_test.append(svm_classification.score(X_test,y_test))



In [137]:
result_dic = {"recall":recall, "f1_score":f1_score, "acc_validation":acc_validation, "acc_test":acc_test, "mcc":mcc}
result_df = pd.DataFrame(result_dic, index=Y.columns)
result_df.to_csv("./result/SVM_classification.csv")