In [None]:
import torch
torch.__version__

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
# Author Oliver
# Date 2023/4/5 18:54
# load dataset

import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np
from tqdm import tqdm
from matplotlib import pyplot

#load dataset

trainDataset = pd.read_csv('./processed_train.csv')
testDataset = pd.read_csv('./processed_test.csv')

print('\n=======================\n',trainDataset.shape)
print('\n=======================\n',testDataset.shape)

X_train = trainDataset.iloc[:,0:43]
Y_train = trainDataset.iloc[:,-1]
X_test = testDataset.iloc[:,0:43]

print(X_train.head())
print(Y_train.head())
print(X_test.head())

'''
#Normalization using normal scaler, result is having each feature in mean=0 and std=1
#print the test input features subset before normalization
print(X_test)
print(X_test.shape)
'''
#Normalization, both test and train features
scaler=StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
'''
#print the test input features, after normalization
print("\n------------------\n",X_test)
print(X_test.shape)
'''
import torch.nn as nn
import torch
from torch.utils.data import TensorDataset, DataLoader

#X_train.shape

X_train = torch.tensor(X_train).float()
Y_train = torch.tensor(Y_train).float()
X_test_tensor = torch.tensor(X_test).float()

train_data = TensorDataset(X_train,Y_train)
data = DataLoader(train_data,batch_size=32,shuffle=True)

In [None]:
#MLP
#structure:43*512*256*128
hidden_size = [512,256,128,64,32]
class lin_model(nn.Module):
    def __init__(self,hidden_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.blocks = nn.ModuleList([])

        for i,size in enumerate(hidden_size):
            if i==0:
                self.blocks.extend(
                    nn.ModuleList([
                        nn.Linear(43,hidden_size[i]),nn.ReLU(hidden_size[i])
                    ]))
            else:
                self.blocks.extend(
                nn.ModuleList([
                    nn.Linear(hidden_size[i-1],hidden_size[i]),nn.ReLU(hidden_size[i])])
                )
            self.last_layer = nn.Linear(hidden_size[-1],1)
    def forward(self,X):
        for net in self.blocks:
            X=net(X)
        X=self.last_layer(X)
        X = X.squeeze()
        return X

epochs=600
model = lin_model(hidden_size=hidden_size)
model = model.float()
model = model.to(device)

#MAE
Loss1 = nn.L1Loss()
Optimizer = torch.optim.Adam(model.parameters(),lr=1e-3)

trainMAEList = []
epochList = []

for epoch in tqdm(range(epochs)):
    batch_mae=[]

    for i,datal in enumerate(data):
        X,Y = datal
    X = X.to(device)
    Y = Y.to(device)

    #print(X.device)
    #print(Y.device)

    Optimizer.zero_grad()

    output = model(X)
    loss1=Loss1(output,Y)

    batch_mae.append(loss1.detach().cpu().numpy())
    loss1.backward()
    Optimizer.step()

    if epoch % 10==0:
        epochList.append(epoch)
        trainMAEList.append(np.mean(batch_mae))
        print(f"epochs:{epoch},loss:{loss1}")

        print(f"mas:{loss1.detach().cpu().numpy()}")

        # define the file path to save the model
        PATH = "./result/model_mlp_mae.pt"
        # save the model
        torch.save(model.state_dict(), PATH)

Y_out = model(X_test_tensor)

# 将新列添加到数据集的最后一列
Y_out_np = Y_out.detach().numpy()
#X_test_dataframe = pd.DataFrame(X_test.detach().numpy())

testDataset_new = pd.read_csv('./processed_test.csv')
testDataset_new['resale_price'] = Y_out_np

# 保存新数据集
testDataset_new.to_csv('./result/resultAll_mlp_mae.csv', index=False)


result = pd.DataFrame()
result.insert(result.shape[0], 'Id', range(len(Y_out_np)))
result.insert(1, 'Predicted', Y_out_np)
result.to_csv('./result/result_mlp_mae.csv', index = 0)

In [None]:
pyplot.plot(epochList,trainMAEList,color='red',marker='o',linewidth=1,markersize=2,label='CNN_Train_MAE')
pyplot.legend(loc=0)
pyplot.title('MLP Train MAE')
pyplot.xlabel("epoch")
#pyplot.ylabel("CNN Train MSE")
pyplot.title('MLP Train MAE')