In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler, MinMaxScaler

  from .autonotebook import tqdm as notebook_tqdm


In [192]:
def get_train_data():
    file = "data/raw/train_90.csv"
    train_data = pd.read_csv(file)
    return train_data


def get_test_data():
    file = "data/raw/A榜/node_test_4_A.csv"
    test_data = pd.read_csv(file)
    return test_data

In [193]:
class DNN(nn.Module):
    def __init__(self):
        super(DNN, self).__init__()
        # Layer 1
        self.fc1 = nn.Linear(36, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.drop1 = nn.Dropout(p=0.3)

        # Layer 2
        self.fc2 = nn.Linear(128, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.drop2 = nn.Dropout(p=0.4)

        # Layer 3
        self.fc3 = nn.Linear(256, 512)
        self.bn3 = nn.BatchNorm1d(512)
        self.drop3 = nn.Dropout(p=0.5)

        # Layer 4
        self.fc4 = nn.Linear(512, 1024)
        self.bn4 = nn.BatchNorm1d(1024)
        self.drop4 = nn.Dropout(p=0.6)

        # Output Layer
        self.fc5 = nn.Linear(1024, 2)

    def forward(self, x):
        out = self.drop1(F.relu(self.bn1(self.fc1(x))))
        out = self.drop2(F.relu(self.bn2(self.fc2(out))))
        out = self.drop3(F.relu(self.bn3(self.fc3(out))))
        out = self.drop4(F.relu(self.bn4(self.fc4(out))))
        out = self.fc5(out)
        return out

In [194]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Layer 1
        self.conv1 = nn.Conv1d(in_channels=36, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool1d(kernel_size=1, stride=1)
        self.drop1 = nn.Dropout(p=0.1)

        # Layer 2
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool1d(kernel_size=1, stride=1)
        self.drop2 = nn.Dropout(p=0.1)

        # Fully Connected Layers
        self.fc1 = nn.Linear(128, 256)
        self.drop3 = nn.Dropout(p=0.1)
        self.fc2 = nn.Linear(256, 2)

    def forward(self, x):
        x = x.view(x.size(0), x.size(1), -1)
        out = self.drop1(self.pool1(F.relu(self.conv1(x))))
        out = self.drop2(self.pool2(F.relu(self.conv2(out))))
        out = out.view(out.size(0), -1)
        out = self.drop3(F.relu(self.fc1(out)))
        out = self.fc2(out)
        return out

In [195]:
def train(model, train_loader, criterion, optimizer, scheduler, epochs=50):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        scheduler.step()
        print(f'Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader)}')

In [196]:
def predict(model, test_data):
    model.eval()
    with torch.no_grad():
        outputs = model(test_data)
    return outputs

In [197]:
from sklearn.preprocessing import RobustScaler
d = get_train_data()
t = get_test_data()
ans = []
data = d.iloc[:, :]
test = t.iloc[:, 1:]
X = data.iloc[:, 1:37]
X=RobustScaler().fit_transform(X)
test=RobustScaler().fit_transform(test)
y1 = data['active_index'].values
y2 = data['consume_index'].values
y = data.iloc[:, 37:]
scaler = StandardScaler()
y = scaler.fit_transform(y)

print(X.shape)
print(y.shape)

(102600, 36)
(102600, 2)


In [198]:
model = CNN()

# 定义损失函数和优化器
criterion = nn.MSELoss()
# optimizer = optim.SGD(model.parameters(), lr=0.001, weight_decay=1e-5)
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

X_train = torch.Tensor(X)
X_test = torch.Tensor(test)
y_train = torch.Tensor(y)

batch_size = 256
epoch_num = 50

train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

train(model, train_loader, criterion, optimizer, scheduler, epochs=epoch_num)

Epoch 1/50, Loss: 0.1157111620107791
Epoch 2/50, Loss: 0.05133518530636506
Epoch 3/50, Loss: 0.040252388081051166
Epoch 4/50, Loss: 0.03488423731967695
Epoch 5/50, Loss: 0.02985357591171663
Epoch 6/50, Loss: 0.030715850451287635
Epoch 7/50, Loss: 0.025782671369184877
Epoch 8/50, Loss: 0.027727766694069057
Epoch 9/50, Loss: 0.022993136392418585
Epoch 10/50, Loss: 0.021384309498857976
Epoch 11/50, Loss: 0.01738927488742475
Epoch 12/50, Loss: 0.018468091127339593
Epoch 13/50, Loss: 0.016674272551147867
Epoch 14/50, Loss: 0.01643550866271864
Epoch 15/50, Loss: 0.016376282442639816
Epoch 16/50, Loss: 0.014650870352621165
Epoch 17/50, Loss: 0.014799625321321579
Epoch 18/50, Loss: 0.014944480435964547
Epoch 19/50, Loss: 0.01464901602318385
Epoch 20/50, Loss: 0.014997448458654476
Epoch 21/50, Loss: 0.012681154268294425
Epoch 22/50, Loss: 0.013545417424263502
Epoch 23/50, Loss: 0.01227394102299191
Epoch 24/50, Loss: 0.012440530670356201
Epoch 25/50, Loss: 0.01184883365231076
Epoch 26/50, Loss: 

In [199]:
test_dataset = TensorDataset(X_test)
test_loader = DataLoader(test_dataset)

# 进行预测
predictions = []
for test_data in test_loader:
    pred = predict(model, test_data[0])
    predictions.append(pred)

In [207]:
# 将包含 PyTorch Tensor 的列表连接成一个 4560x2 的矩阵
p = torch.cat(predictions, dim=0).numpy()

# 打印矩阵的形状和第一个元素
print(p.shape)
print(p[0])

(4560, 2)
[-0.5301677 -1.0551057]


In [208]:
p = scaler.inverse_transform(p)
print(p)

[[68.426834 60.645542]
 [67.99642  62.33435 ]
 [68.28324  62.488197]
 ...
 [73.24848  82.81422 ]
 [73.28857  82.79428 ]
 [74.05747  83.91006 ]]


In [210]:
ans = []
for i in range(len(p)):
        s = str(t['geohash_id'].iloc[i])
        s += '\t'
        s += str(p[i][1])
        s += '\t'
        s += str(p[i][0])
        s += '\t'
        s += str(t['date_id'].iloc[i])
        ans.append(s)

df = pd.DataFrame(data=ans, columns=["geohash_id\tconsumption_level\tactivity_level\tdate_id"])
df.to_csv("CNN.csv", index=False)
print("finished")

finished
