In [27]:
# 查看当前挂载的数据集目录, 该目录下的变更重启环境后会自动还原
# View dataset directory. 
# This directory will be recovered automatically after resetting environment. 
!ls /home/aistudio/data

data300332


In [28]:
# 查看工作区文件，该目录下除data目录外的变更将会持久保存。请及时清理不必要的文件，避免加载过慢。
# View personal work directory. 
# All changes, except /data, under this directory will be kept even after reset. 
# Please clean unnecessary files in time to speed up environment loading. 
!ls /home/aistudio

data  external-libraries  main.ipynb  work


In [29]:
# 如果需要进行持久化安装, 需要使用持久化路径, 如下方代码示例:
# If a persistence installation is required, 
# you need to use the persistence path as the following: 
!mkdir /home/aistudio/external-libraries
!pip install beautifulsoup4 -t /home/aistudio/external-libraries

mkdir: cannot create directory '/home/aistudio/external-libraries': File exists
Looking in indexes: https://mirror.baidu.com/pypi/simple/, https://mirrors.aliyun.com/pypi/simple/
Collecting beautifulsoup4
  Using cached https://mirrors.aliyun.com/pypi/packages/b1/fe/e8c672695b37eecc5cbf43e1d0638d88d66ba3a44c4d321c796f4e59167f/beautifulsoup4-4.12.3-py3-none-any.whl (147 kB)
[0mCollecting soupsieve>1.2 (from beautifulsoup4)
  Using cached https://mirrors.aliyun.com/pypi/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4
Successfully installed beautifulsoup4-4.12.3 soupsieve-2.6
[0m

In [30]:
# 同时添加如下代码, 这样每次环境(kernel)启动的时候只要运行下方代码即可: 
# Also add the following code, 
# so that every time the environment (kernel) starts, 
# just run the following code: 
import sys 
sys.path.append('/home/aistudio/external-libraries')

In [35]:
import paddle
import paddle.nn as nn
import paddle.optimizer as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from paddle.io import DataLoader, Dataset

# 自定义数据集类
class LOLDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __getitem__(self, index):
        return self.X[index], self.y[index]

    def __len__(self):
        return len(self.X)

# 加载训练数据
train_data = pd.read_csv('data/data300332/train.csv')

# 训练集
X_train = train_data.drop(['id', 'win'], axis=1)
y_train = train_data['win']

# 标准化处理
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

# 将数据转换为 float32 类型
X_train = X_train.astype('float32')
y_train = y_train.values.astype('float32')

# 创建数据集和数据加载器
train_dataset = LOLDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)

class SimpleNN(nn.Layer):
    def __init__(self, input_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

# 初始化模型
input_size = X_train.shape[1]
model = SimpleNN(input_size)

criterion = nn.BCELoss()
optimizer = optim.Adam(learning_rate=0.001, parameters=model.parameters())

# 训练模型
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # 前向传播
        outputs = model(data)
        loss = criterion(outputs, target.reshape([-1, 1]))

        # 反向传播和优化
        optimizer.clear_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 1 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# 加载测试数据
test_data = pd.read_csv('data/data300332/test.csv')

# 测试集
X_test = test_data.drop(['id'], axis=1)

# 标准化处理
X_test = scaler.transform(X_test)

# 将数据转换为 float32 类型
X_test = X_test.astype('float32')

# 将测试数据转换为PaddlePaddle张量
X_test = paddle.to_tensor(X_test, dtype='float32')

# 预测
model.eval()
y_pred = model(X_test)
y_pred = (y_pred > 0.5).astype('int32')

# 将预测结果保存到测试数据中
test_data['win'] = y_pred.numpy().flatten()

# 保存预测结果
test_data[['win']].to_csv('submission.csv', index=False)

Epoch [1/20], Loss: 0.4226
Epoch [2/20], Loss: 0.2804
Epoch [3/20], Loss: 0.1960
Epoch [4/20], Loss: 0.2788
Epoch [5/20], Loss: 0.2466
Epoch [6/20], Loss: 0.3173
Epoch [7/20], Loss: 0.2307
Epoch [8/20], Loss: 0.5288
Epoch [9/20], Loss: 0.2167
Epoch [10/20], Loss: 0.2106
Epoch [11/20], Loss: 0.2332
Epoch [12/20], Loss: 0.4681
Epoch [13/20], Loss: 0.2490
Epoch [14/20], Loss: 0.2986
Epoch [15/20], Loss: 0.1739
Epoch [16/20], Loss: 0.2259
Epoch [17/20], Loss: 0.4682
Epoch [18/20], Loss: 0.5396
Epoch [19/20], Loss: 0.3724
Epoch [20/20], Loss: 0.3568


请点击[此处](https://ai.baidu.com/docs#/AIStudio_Project_Notebook/a38e5576)查看本环境基本用法.  <br>
Please click [here ](https://ai.baidu.com/docs#/AIStudio_Project_Notebook/a38e5576) for more detailed instructions. 