In [43]:
import pandas
import  torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader

train_data = pandas.read_csv('train.csv')
test_data = pandas.read_csv('test.csv')
train_features = train_data.iloc[:, 1:-1]
train_labels = train_data.iloc[:, -1]
test_features = test_data.iloc[:, 1:]
all_features = pandas.concat([train_features, test_features], axis=0)
mean = train_labels.mean()
std = train_labels.std()
train_labels = (train_labels - mean) / (std + 1e-8)
all_features = all_features.apply(lambda x: (x - x.mean()) / (x.std() + 1e-8) if x.dtype != 'object' else x)
all_features = all_features.fillna(0)
all_features = pandas.get_dummies(all_features, dummy_na=True)

train_features = torch.tensor(all_features.iloc[:train_features.shape[0], :].values, dtype=torch.float32)
test_features = torch.tensor(all_features.iloc[train_features.shape[0]:, :].values, dtype=torch.float32)
train_labels = torch.tensor(train_labels.values, dtype=torch.float32).reshape(-1,1)

def train(train_iter, test_iter, net, trainer, loss, num_epochs=100):
    net.to(torch.device('cuda'))
    for epoch in range(num_epochs):
        net.train()
        for X, y in train_iter:
            X = X.to(torch.device('cuda'))
            y = y.to(torch.device('cuda'))
            trainer.zero_grad()
            l = loss(net(X), y)
            l.backward()
            trainer.step()
        net.eval()
        with torch.no_grad():
            train_l = sum(loss(net(X.to(torch.device('cuda'))), y.to(torch.device('cuda'))).item() for X, y in train_iter) / len(train_iter)
            test_l = sum(loss(net(X.to(torch.device('cuda'))), y.to(torch.device('cuda'))).item() for X, y in test_iter) / len(test_iter)
        print(f'epoch {epoch + 1}, train loss {train_l:.4f}, test loss {test_l:.4f}')

def k_fold(train_features, train_labels, k=5):
    flod_size = len(train_features) // k
    for i in range(k):
        train_idx = list(range(0, i * flod_size)) + list(range((i + 1) * flod_size, len(train_features)))
        test_idx = slice(i * flod_size, (i + 1) * flod_size)
        train_iter = DataLoader(TensorDataset(train_features[train_idx, :], train_labels[train_idx, :]), batch_size=256, shuffle=True)
        test_iter = DataLoader(TensorDataset(train_features[test_idx, :], train_labels[test_idx, :]), batch_size=256, shuffle=False)
        net = nn.Sequential(
            nn.Linear(train_features.shape[1], 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
        trainer = torch.optim.SGD(net.parameters(), lr=0.03)
        loss = nn.MSELoss()
        train(train_iter, test_iter, net, trainer, loss, num_epochs=10)

k_fold(train_features, train_labels, k=5)
        

KeyboardInterrupt: 

In [44]:
import pandas
import  torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader

train_data = pandas.read_csv('train.csv')
test_data = pandas.read_csv('test.csv')
train_features = train_data.iloc[:, 1:-1]
train_labels = train_data.iloc[:, -1]
test_features = test_data.iloc[:, 1:]
all_features = pandas.concat([train_features, test_features], axis=0)
mean = train_labels.mean()
std = train_labels.std()
train_labels = (train_labels - mean) / (std + 1e-8)
all_features = all_features.apply(lambda x: (x - x.mean()) / (x.std() + 1e-8) if x.dtype != 'object' else x)
all_features = all_features.fillna(0)
all_features = pandas.get_dummies(all_features, dummy_na=True)

train_features = torch.tensor(all_features.iloc[:train_features.shape[0], :].values, dtype=torch.float32)
test_features = torch.tensor(all_features.iloc[train_features.shape[0]:, :].values, dtype=torch.float32)
train_labels = torch.tensor(train_labels.values, dtype=torch.float32).reshape(-1,1)

train_iter = DataLoader(TensorDataset(train_features, train_labels), batch_size=256, shuffle=True)
test_iter = DataLoader(TensorDataset(test_features), batch_size=256, shuffle=False)

loss = nn.MSELoss()
net = nn.Sequential(
    nn.Linear(train_features.shape[1], 128),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, 32),
    nn.ReLU(),
    nn.Linear(32, 1)
)

trainer = torch.optim.SGD(net.parameters(), lr=0.03)

def train(train_iter, net, trainer, loss, num_epochs=100):
    net.to(torch.device('cuda'))
    for epoch in range(num_epochs):
        net.train()
        for X, y in train_iter:
            X = X.to(torch.device('cuda'))
            y = y.to(torch.device('cuda'))
            trainer.zero_grad()
            l = loss(net(X), y)
            l.backward()
            trainer.step()
        net.eval()
        with torch.no_grad():
            train_l = sum(loss(net(X.to(torch.device('cuda'))), y.to(torch.device('cuda'))).item() for X, y in train_iter) / len(train_iter)
        print(f'epoch {epoch + 1}, train loss {train_l:.4f}')

train(train_iter, net, trainer, loss, num_epochs=10)

net.eval()
with torch.no_grad():
    predictions = net(test_features.to(torch.device('cuda'))).cpu()
predictions = predictions * (std + 1e-8) + mean

with open('submission.csv', 'w') as f:
    f.write('id,exam_score\n')
    for i, y in enumerate(predictions):
        f.write(f'{i+630000},{y.item()}\n')
    f.close()


epoch 1, train loss 0.2223
epoch 2, train loss 0.2217
epoch 3, train loss 0.2213
epoch 4, train loss 0.2207
epoch 5, train loss 0.2201
epoch 6, train loss 0.2204
epoch 7, train loss 0.2199
epoch 8, train loss 0.2201
epoch 9, train loss 0.2206
epoch 10, train loss 0.2198
