## SVM 分类

In [81]:
from sklearn.svm import SVC
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn import datasets
from sklearn.model_selection import train_test_split
import pandas as pd
import joblib
from sklearn import metrics
import math
import numpy as np
from sklearn.metrics import classification_report

In [95]:
# 读取数据
iris = datasets.load_iris()
X = iris.data
y = iris.target

# 标准化
std = StandardScaler()
X_std = std.fit_transform(X)

# 拆分训练集
X_train, X_test, y_train, y_test = train_test_split(X_std, y, test_size=0.3)

# SVM建模
svm_classification = SVC()
svm_classification.fit(X_train, y_train)
y_pred = svm_classification.predict(X_test)
# 模型效果
svm_classification.score(X_test, y_test)
score = [classification_report(y_test,y_pred)]

In [96]:
score.append("acc")
with open('爬到的数据.txt', 'a+', encoding='utf-8') as f:
    for data in score:
        f.write(data+'\n')

f.close()

## SVM 回归

In [None]:


# 读取数据
boston = datasets.load_boston()
X = boston.data
y = boston.target

# 标准化
std = StandardScaler()
X_std = std.fit_transform(X)

# 拆分训练集
X_train, X_test, y_train, y_test = train_test_split(X_std, y, test_size=0.3)

# SVM建模
svm_regression = SVR(C=2, kernel='rbf')
svm_regression.fit(X_train, y_train)

# 模型效果
svm_regression.score(X_test, y_test)

## 模型调参：网格搜索

In [23]:

# 定义参数的组合
params = {
    "kernel":['linear', 'rbf', 'poly', 'sigmoid'],
    'C':[0.01, 0.1, 0.5, 1, 2,]
}

# 用网格搜索拟合模型
model = GridSearchCV(svm_regression, param_grid=params, cv=10)
model.fit(X,y)

# 查看结果
print("最好的参数组和：", model.best_params_)
print("最好的得分：", model.best_score_)


最好的参数组和： {'C': 2, 'kernel': 'linear'}
最好的得分： 0.3151273617911972


## part1 SVM_Regression applied in ads effectiveness prediction

In [70]:
df = pd.read_csv("./data/ads_3.csv")

X = df[df.columns[:62]]
Y = df[df.columns[62:]]
std = StandardScaler()     #标准化特征
X_std = std.fit_transform(X)


In [39]:
# 定义参数的组合
params = {
    "kernel":['linear', 'rbf', 'poly', 'sigmoid'],
    'C':[0.01, 0.1, 0.5, 1, 2, 10, 100]
}

svm_regression = SVR()
model = GridSearchCV(svm_regression, param_grid=params, cv=10)
model.fit(X, Y[Y.columns[1]])

# 查看结果
print("最好的参数组和：", model.best_params_)
print("最好的得分：", model.best_score_)

最好的参数组和： {'C': 0.5, 'kernel': 'rbf'}
最好的得分： -0.13549245031976037


In [71]:
MSE = []
RMSE = []
R_squared = []

for i in range(12):
    y = Y[Y.columns[i]]
    y_std = std.fit_transform(np.array(y).reshape(-1,1))
    X_train, X_test, y_train, y_test = train_test_split(X_std, y_std.ravel(), random_state=1)    
    params = {
        "kernel":['linear', 'rbf', 'poly', 'sigmoid'],
        'C':[0.01, 0.1, 0.5, 1, 2, 10, 100]
    }
    
    svm_regression = SVR()
    model = GridSearchCV(svm_regression, param_grid=params, cv=10)
    model.fit(X_std, y_std.ravel())
    C = model.best_params_["C"]
    kernel = model.best_params_["kernel"]
    
    svm_regression = SVR(C=C, kernel=kernel)
    svm_regression.fit(X_train, y_train)

    joblib.dump(svm_regression, "model/SVM_regression/model{}.pkl".format(i+1))
    y_pred = svm_regression.predict(X_test)
    MSE.append(metrics.mean_squared_error(y_test, y_pred))
    RMSE.append(math.sqrt(metrics.mean_squared_error(y_test, y_pred)))
    R_squared.append(metrics.r2_score(y_test, y_pred))

In [72]:
result_dic = {"MSE":MSE, "RMSE":RMSE, "R_squared":R_squared}
result_df = pd.DataFrame(result_dic, index=Y.columns)
result_df.to_csv("result/SVM_regression.csv")

## part2 SVM_Classification applied in ads effectiveness prediction

In [107]:
df = pd.read_csv("./data/ads_3.csv")

X = df[df.columns[:62]]
Y = df[df.columns[62:]]
Y = round(Y*10).astype(int)
std = StandardScaler()     #标准化特征
X_std = std.fit_transform(X)


In [108]:

score_list = []
for i in range(12):
    y = Y[Y.columns[i]]
    X_train, X_test, y_train, y_test = train_test_split(X_std, y, random_state=0, train_size=0.7)
    
    params = {
        "kernel":['linear', 'rbf', 'poly', 'sigmoid'],
        'C':[0.01, 0.1, 0.5, 1, 2, 10, 100]
    }
    
    svm_classification = SVC()
    model = GridSearchCV(svm_classification, param_grid=params, cv=10)
    model.fit(X_train, y_train)
    C = model.best_params_["C"]
    kernel = model.best_params_["kernel"]
    
    svm_classification = SVC(C=C, kernel=kernel)
    svm_classification.fit(X_train, y_train)

    joblib.dump(svm_regression, "model/SVM_classification/model{}.pkl".format(i+1))
    y_pred = svm_classification.predict(X_test)

    # score_list.append(classification_report(y_test,y_pred))
    score_list.append(svm_classification.score(X_test,y_test))
    
  
    

# with open('./result/SVM_classification.txt', 'a+', encoding='utf-8') as f:
#     for data in score_list:
#         f.write(data+'\n')

# f.close()
    # MSE.append(metrics.mean_squared_error(y_test, y_pred))
    # RMSE.append(math.sqrt(metrics.mean_squared_error(y_test, y_pred)))
    # R_squared.append(metrics.r2_score(y_test, y_pred))




In [106]:
result_df = pd.DataFrame(score_list, index=Y.columns, columns=['ACC'])
result_df.to_csv("./result/SVM_classification.csv")