In [None]:
# 导入各种库
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split 
import lightgbm as lgb
from sklearn.model_selection import GridSearchCV

In [None]:
# 读取数据
train_data= pd.read_csv('zhengqi_train.txt',sep='\t')
test_data= pd.read_csv('zhengqi_test.txt',sep='\t')

# 提取标签值
feature_list= list(train_data.columns)
feature_list.remove('target')

In [None]:
# 可视化观察数据分布
# 观察训练集与测试集各特征分布是否相同
plt.figure(figsize=(30,30))
for i in range(38):
    ax=plt.subplot(8,5,i+1)
    pd.DataFrame(train_data).iloc[:,i].plot(kind='kde',ax=ax,color='y')
    pd.DataFrame(test_data).iloc[:,i].plot(kind='kde',ax=ax)
    ax.set_title(test_data.columns[i])
    

In [None]:
# 删除分布差异较大的属性
train_data=train_data[['V0','V1','V3','V4','V7','V8','V10','V12','V15','V16','V18','V25','V26','V28','V29','V30','V31','V32','V33','V34','V36','V37','target']]
test_data=test_data[['V0','V1','V3','V4','V7','V8','V10','V12','V15','V16','V18','V25','V26','V28','V29','V30','V31','V32','V33','V34','V36','V37']]

# 更新列表
feature_list=list(train_data.columns)
feature_list.remove('target')

In [None]:
# 根据相关性删除数据
t=train_data.corr()

In [None]:
drop_columns=t[(np.abs(t['target'])<0.5)].index
train_data.drop(columns=drop_columns,inplace=True)
test_data.drop(columns=drop_columns,inplace=True)

In [None]:
# 更新列表
feature_list=list(train_data.columns)
feature_list.remove('target')

In [None]:
# 交叉验证法
X= train_data.iloc[:,:-1]
y= train_data['target']
X_train,X_test,y_train,y_test= train_test_split(X,y,test_size=0.25)

X_train.reset_index(drop=True)
X_test.reset_index(drop=True)
y_train.reset_index(drop=True)
y_test.reset_index(drop=True)

In [None]:
X_train=np.array(X_train)
X_test=np.array(X_test)
y_train=np.array(y_train)
y_test=np.array(y_test)

In [None]:
# xgboost算法
from sklearn.metrics import mean_squared_error
import xgboost as xgb

model_xgb=xgb.XGBRegressor(max_depth=4,learning_rate=0.1,n_estimators=100,objective='reg:linear')
model_xgb.fit(X_train,y_train)

predict_xgb=model_xgb.predict(X_test)
mse_xgb=mean_squared_error(y_test,predict_xgb)

plt.plot(y_test)
plt.plot(predict_xgb)

In [None]:
# GBDT Regression
from sklearn.ensemble import GradientBoostingRegressor
model_gbdt=GradientBoostingRegressor(
    loss='ls',
    learning_rate=0.04,
    n_estimators=200,
    subsample=1,
    min_samples_split=2,
    min_samples_leaf=1,
    max_depth=3,
    alpha=0.9,
    verbose=0,
)

model_gbdt.fit(X_train,y_train)

predict_gbdt=model_gbdt.predict(X_test)

mse_gbdt=mean_squared_error(y_test,predict_gbdt)

plt.plot(y_test)
plt.plot(predict_gbdt)

In [None]:
#Random Forests
from sklearn.ensemble import RandomForestRegressor
model_RF=RandomForestRegressor(n_estimators=200,random_state=2)
model_RF.fit(X_train,y_train)
predict_RF=model_RF.predict(X_test)
mse_RF=mean_squared_error(y_test,predict_RF)

plt.plot(y_test)
plt.plot(predict_RF)


In [None]:
# Bayesian Linear Regression
from sklearn import linear_model
model_BR=linear_model.BayesianRidge()
model_BR.fit(X_train,y_train)

predict_BR=model_BR.predict(X_test)
mse_BR=mean_squared_error(y_test,predict_BR)

plt.plot(y_test)
plt.plot(predict_BR)

In [None]:
# LightGBM 回归算法
import lightgbm as lgb
model_lgb=lgb.LGBMRegressor()
model_lgb.fit(X_train,y_train)

predict_lgb=model_lgb.predict(X_test)
mse_lgb=mean_squared_error(y_test,predict_lgb)

plt.plot(y_test)
plt.plot(predict_lgb)

In [None]:
# DecisionTreeRegressor
from sklearn.tree import DecisionTreeRegressor
model_DR=DecisionTreeRegressor(splitter='best')
model_DR.fit(X_train,y_train)

predict_DR=model_DR.predict(X_test)
mse_DR=mean_squared_error(y_test,predict_DR)

plt.plot(y_test)
plt.plot(predict_DR)

In [None]:
# Lasso 回归
from sklearn.linear_model import Lasso
model_Las=Lasso(alpha=0.01)
model_Las.fit(X_train,y_train)

predict_Las=model_Las.predict(X_test)
mse_Las=mean_squared_error(y_test,predict_Las)

plt.plot(y_test)
plt.plot(predict_Las)

In [None]:
# 模型融合
predictions=[]

predictions.append(model_BR.predict(X_test))
predictions.append(model_gbdt.predict(X_test))
predictions.append(model_Las.predict(X_test))
predictions.append(model_lgb.predict(X_test))
predictions.append(model_RF.predict(X_test))
predictions.append(model_xgb.predict(X_test))

weights=[0.1,0.4,0.1,0.1,0.1,0.2]

weighted_prediction=np.average(predictions,axis=0,weights=weights)
mse_sum=mean_squared_error(y_test,weighted_prediction)

In [None]:
predictions_test=[]

predictions_test.append(model_BR.predict(test_data))
predictions_test.append(model_gbdt.predict(test_data))
predictions_test.append(model_Las.predict(test_data))
predictions_test.append(model_lgb.predict(test_data))
predictions_test.append(model_RF.predict(test_data))
predictions_test.append(model_xgb.predict(test_data))

weights=[0.1,0.4,0.1,0.1,0.1,0.2]

weighted_prediction=np.average(predictions_test,axis=0,weights=weights)
