In [1]:
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader

# 加载数据
data = pd.read_csv('train_90.csv')  # 请替换为您的文件路径

# 提取特征和目标变量
X = data.iloc[:, 2:37].values  # 选择所有特征列 (F_1 到 F_35)
y = data[['active_index', 'consume_index']].values  # 目标列

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 转换为张量
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# 定义数据加载器
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=16)  # 可以调整批次大小

# 定义模型
class CustomLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(CustomLSTM, self).__init__()
        self.hidden_dim = hidden_dim

        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(1, x.size(0), self.hidden_dim).to(x.device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# 实例化模型
input_dim = 35
hidden_dim = 50
output_dim = 2

model = CustomLSTM(input_dim, hidden_dim, output_dim)

# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

# 训练模型
num_epochs = 100  # 可以根据需要调整
for epoch in range(num_epochs):
    for inputs, labels in train_loader:
        # 重塑输入以符合网络期望的维度
        inputs = inputs.reshape(-1, 1, 35)
        
        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print('Training finished.')

Epoch [10/100], Loss: 0.1801
Epoch [20/100], Loss: 0.1238
Epoch [30/100], Loss: 0.1193
Epoch [40/100], Loss: 0.1008
Epoch [50/100], Loss: 0.0969
Epoch [60/100], Loss: 0.0940
Epoch [70/100], Loss: 0.0864
Epoch [80/100], Loss: 0.0959
Epoch [90/100], Loss: 0.0963
Epoch [100/100], Loss: 0.1021
Training finished.


In [2]:
torch.save(model.state_dict(), 'model.pth')
data1=pd.read_csv('node_test_4_A.csv')#这是预测集

In [4]:
X1 = data1.iloc[:, 2:37].values  # 选择所有特征列 (F_1 到 F_35)

In [8]:
# 假设有输入数据x
x_tensor = torch.tensor(X1, dtype=torch.float32)
x_tensor = x_tensor.reshape(-1, 1, input_dim)  # 重塑输入以符合网络期望的维度

# 将输入传递给模型进行预测
output = model(x_tensor)

# 对输出进行后处理，例如应用softmax函数或选择最大值
# ...

# 输出预测结果
print(output)

tensor([[68.9584, 60.9524],
        [68.6917, 62.2193],
        [69.2197, 62.8837],
        ...,
        [74.2293, 84.9824],
        [74.3311, 84.9870],
        [75.3430, 86.4011]], grad_fn=<AddmmBackward0>)


In [9]:
# 将张量转换为NumPy数组
numpy_array = output.detach().numpy()

# 将NumPy数组转换为DataFrame
df = pd.DataFrame(numpy_array)

In [12]:
df.head()

Unnamed: 0,0,1
0,68.958366,60.952396
1,68.691711,62.219269
2,69.219727,62.883705
3,68.691956,63.265228
4,61.710728,49.527241


In [15]:
data1.head()

Unnamed: 0,geohash_id,date_id,F_1,F_2,F_3,F_4,F_5,F_6,F_7,F_8,...,F_26,F_27,F_28,F_29,F_30,F_31,F_32,F_33,F_34,F_35
0,4885e281g,20230404,-0.839,-0.857,-0.827,-0.834,-0.833,-0.852,0.733,1.155,...,0.127,0.0,0.055,0.373,0.01,-0.46,-0.527,-0.521,-0.603,-1.568
1,4885e281g,20230405,-0.776,-0.776,-0.721,-0.77,-0.762,-0.792,0.281,0.891,...,0.141,0.0,0.073,0.333,0.012,-0.396,-0.456,-0.471,-0.463,-1.686
2,4885e281g,20230406,-0.725,-0.717,-0.724,-0.755,-0.73,-0.758,0.814,1.498,...,0.126,0.0,0.07,0.322,0.009,-0.373,-0.444,-0.453,-0.569,-1.526
3,4885e281g,20230407,-0.742,-0.733,-0.735,-0.749,-0.755,-0.772,0.664,1.4,...,0.185,0.0,0.076,0.324,0.018,-0.374,-0.453,-0.453,-0.623,-1.511
4,5324516fr,20230404,-1.067,-1.117,-1.098,-1.006,-1.024,-1.052,-0.378,-0.408,...,0.086,0.0,0.317,0.245,0.005,-0.684,-0.745,-0.669,-1.492,-1.873


In [20]:
data1_sub=data1.iloc[:,:2]
combined_df=pd.concat([data1_sub,df],axis=1)
combined_df.head()

Unnamed: 0,geohash_id,date_id,0,1
0,4885e281g,20230404,68.958366,60.952396
1,4885e281g,20230405,68.691711,62.219269
2,4885e281g,20230406,69.219727,62.883705
3,4885e281g,20230407,68.691956,63.265228
4,5324516fr,20230404,61.710728,49.527241


In [23]:
# 假设df是您的DataFrame，并且有四列
columns = combined_df.columns.tolist()  # 获取列名列表
new_order = [columns[0], columns[2], columns[3], columns[1]]  # 创建新的列顺序

# 根据新的列顺序重排DataFrame
df_reordered = combined_df[new_order]

In [28]:
df_reordered.columns=['geohash_id','consumption_level','activity_level','date_id']
df_reordered

Unnamed: 0,geohash_id,consumption_level,activity_level,date_id
0,4885e281g,68.958366,60.952396,20230404
1,4885e281g,68.691711,62.219269,20230405
2,4885e281g,69.219727,62.883705,20230406
3,4885e281g,68.691956,63.265228,20230407
4,5324516fr,61.710728,49.527241,20230404
...,...,...,...,...
4555,607779c2c,74.325798,76.470215,20230407
4556,1d3640fad,73.087105,82.567642,20230404
4557,1d3640fad,74.229271,84.982399,20230405
4558,1d3640fad,74.331055,84.987000,20230406


In [29]:
df_reordered.to_csv('output.csv',sep='\t',index=False)