In [10]:
import numpy as np
from xgboost import XGBClassifier
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from thundersvm import SVC
import matplotlib.pyplot as plt
from sklearn.model_selection import (
    GridSearchCV,
    train_test_split,
    KFold,
    cross_val_score,
)
from sklearn.metrics import accuracy_score
from sklearn import metrics

data_std=pd.read_csv('train_std.csv')
test_std=pd.read_csv('test_std.csv')

# X_std=np.array(data_std.drop(['uid','y'],axis=1))
# y_std=np.array(data_std['y'])
# X_train_std,X_val_std,y_train_std,y_val_std=train_test_split(X_std,y_std,test_size=0.2,random_state=42)

X_train_std=np.array(data_std.drop(['uid','y'],axis=1))
y_train_std=np.array(data_std['y'])
X_val_std=np.array(test_std.drop(['uid'],axis=1))

xgb_params={
    'max_depth': 9, 
    'min_child_weight':2,
    'colsample_bytree': 0.55,
    'subsample': 0.88,
    'learning_rate': 0.005, 
    'n_estimators': 1600, 
    'gamma':0.14,
    'scale_pos_weight':1,
    'tree_method':'gpu_hist',
    'objective':'binary:logistic',
    'reg_alpha':0
    }
svm_params= {
    "kernel": "rbf",
    "C":0.99,
    "gamma": 0.145,
    "n_jobs":-1,
    "gpu_id":0,
}
random_params={
    "n_estimators": 200,
    "max_depth": 14,
    "min_samples_split": 2,
    "min_samples_leaf": 1,
    "max_features": 9,
    "n_jobs":-1
}
cat_params= {
    'iterations': 1500,               # 迭代次数
    'learning_rate': 0.07,             # 学习率
    'depth': 7,                       # 树的深度
    'l2_leaf_reg': 50,                 # L2正则化参数
    # 'loss_function': 'MAE',       # 损失函数
    'eval_metric': 'Accuracy',             # 评估指标               # 随机种子
    'od_type': 'Iter',  
    'task_type':"GPU",# 提前停止策略类型
    'od_wait': 200,                    # 提前停止等待次数
    # 'bootstrap_type': 'Bayesian', 
    'verbose':0, #
    'allow_writing_files': False # Bootstrap类型     # 是否允许写文件
}
light_params={
    'max_depth': 5, 
    "num_leaves":32,
    'learning_rate': 0.005, 
    'n_estimators': 2500, 
    "subsample":0.1,
    "colsample_bytree":0.5,
    "reg_alpha":1,
    "reg_lambda":1,
    
    }

seed=20030721

xgb=XGBClassifier(**xgb_params,random_state=seed)
svm=SVC(**svm_params,random_state=seed)
random=RandomForestClassifier(**random_params,random_state=seed)
cat=CatBoostClassifier(**cat_params,random_state=seed)
light=LGBMClassifier(**light_params,random_state=seed)

stacking_models=[svm,random,xgb,light,cat]

stacking_output=np.zeros(len(stacking_models)*len(X_train_std))
stacking_output=stacking_output.reshape((len(X_train_std),len(stacking_models)))
stacking_output_val=np.zeros(len(stacking_models)*len(X_val_std))
stacking_output_val=stacking_output_val.reshape((len(X_val_std),len(stacking_models)))
kf=KFold(n_splits=5,shuffle=True,random_state=seed)
print(stacking_output.shape)

for train_index, val_index in kf.split(X_train_std):
    # 划分训练集和验证集
    X_train_fold_std, X_val_fold_std = X_train_std[train_index], X_train_std[val_index]
    y_train_fold_std, y_val_fold_std = y_train_std[train_index], y_train_std[val_index]

    for i, model in enumerate(stacking_models):
        print(i)
        model.fit(X_train_fold_std, y_train_fold_std)
        val_pred = model.predict(X_val_fold_std)
        stacking_output[val_index,i] = val_pred
        
# stacking_output_test=np.zeros(len(stacking_models)*len(test_std))
# stacking_output_test=stacking_output_test.reshape((len(test_std),len(stacking_models)))
for i, model in enumerate(stacking_models):
        model.fit(X_train_std, y_train_std)
        val_pred = model.predict(X_val_std)
        stacking_output_val[:,i] = val_pred




(8000, 5)
0
1
2
3
4
0
1
2
3
4
0
1
2
3
4
0
1
2
3
4
0
1
2
3
4


In [6]:
# best_score=0
# best_param=None
# for i1 in range(10):
#     for i2 in range(10):
#         print(i1,i2)
#         for i3 in range(10):
#             for i4 in range(10):
#                 for i5 in range(10):
#                     weight_list=np.array([i1,i2,i3,i4,i5])
#                     if weight_list.sum()==0:
#                         continue
#                     weight_list=weight_list/weight_list.sum()
#                     answer_list=weight_list*stacking_output
#                     answer_list=np.sum(answer_list,axis=1)
#                     answer_list=np.round(answer_list)
#                     if accuracy_score(answer_list,y_train_std)>best_score:
#                         best_score=accuracy_score(answer_list,y_train_std)
#                         best_param=weight_list
# print(best_param,best_score)
weight_list=np.array([0.2,0.2,0.4,0.1,0.1])
answer_list=weight_list*stacking_output_val
answer_list=np.sum(answer_list,axis=1)
answer_list=np.round(answer_list)
submission2=pd.DataFrame({'uid':test_std['uid'],'y':answer_list})
submission2.to_csv('submission2.csv',index=False)

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim

# 定义神经网络模型
params1=256
params2=64

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(len(stacking_models),params1)
        self.fc2 = nn.Linear(params1, params2)
        self.fc3 = nn.Linear(params2, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

# 创建模型实例
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net().to(device)

# 定义损失函数和优化器
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.00009)

# 准备输入数据
x = torch.Tensor(stacking_output).to(device)  # 随机生成输入数据
y = torch.Tensor(y_train_std) .to(device) # 随机生成标签数据（0或1）

# 训练模型
epochs = 1300
for epoch in range(epochs):
    # 前向传播
    outputs = model(x)
    loss = criterion(outputs, y)
    
    # 反向传播和优化
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # 打印训练信息
    # print(f"Epoch: {epoch+1}, Loss: {loss.item()}")

# 在新数据上进行预测
model.to('cpu')
new_x = torch.Tensor(stacking_output_val)  # 新的输入数据# 将输入数据展平为一维向量
predictions = np.round(model(new_x).detach().numpy()).astype(int).squeeze()

submission=pd.DataFrame({'uid':test_std['uid'],'y':predictions})
submission.to_csv('submission5.csv',index=False)

# accuracy=accuracy_score(predictions, y_val_std)
# print(accuracy)
