## 注意reptile

In [64]:
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score, mean_absolute_error
import copy
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from tqdm import tqdm
from typing import Callable
import matplotlib.pyplot as plt
from scipy import stats
from sklearn.preprocessing import StandardScaler

train_losses_after = []
def reptile(model: nn.Module, nb_iterations: int, sample_task: Callable, perform_k_training_steps: Callable, k=15, epsilon=0.1):
    for _ in range(nb_iterations):
        task = sample_task()
        phi_tilde = perform_k_training_steps(copy.deepcopy(model), task, k)
        # Update phi
        with torch.no_grad():
            for p,g in zip(model.parameters(), phi_tilde):
                p += epsilon*(g - p)
        # 在每次迭代后评估测试集误差
        df_model_eval = pd.DataFrame(columns=['Iteration', 'MSE', 'RMSE', 'MAE', 'R2', 'MAPE'])
        with torch.no_grad():
            df = pd.read_csv(r"D:\DL_Homework\Kaggle2_Titanic\统计建模\数据\data-2.csv", encoding='gb18030')
             # 将 'date' 列转换为日期时间格式
            df['date'] = pd.to_datetime(df['date'])
             # 添加年、月、日列
            df['year'] = df['date'].dt.year
            df['month'] = df['date'].dt.month
            df['day'] = df['date'].dt.day

            data = df
            y = data['AQI指数']
            x = data.drop(['AQI指数','date'], axis=1)
             # 进行BOX-COX变换
            y = stats.boxcox(y)[0]
            scaler = StandardScaler()
            x = scaler.fit_transform(x)

            x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 33)
            x_test_tensor = torch.tensor(x_test, dtype=torch.float32).unsqueeze(1)
            y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(-1)
            y_pred = model(x_test_tensor)
            test_mse = mean_squared_error(y_test_tensor, y_pred.squeeze(-1))

            # train_losses_after.append(test_mse)
            # print(test_mse)

            MSE = mean_squared_error(y_test_tensor, y_pred.squeeze(-1))
            RMSE = np.sqrt(mean_squared_error(y_test_tensor, y_pred.squeeze(-1)))
            MAE = mean_absolute_error(y_test_tensor, y_pred.squeeze(-1))
            R2 = r2_score(y_test_tensor, y_pred.squeeze(-1))
            MAPE = mean_absolute_percentage_error(y_test_tensor, y_pred.squeeze(-1))

            train_losses_after.append(MSE)
        df_model_eval.loc[_] = [ _, MSE, RMSE, MAE, R2, MAPE]
        print(df_model_eval)

class MLP(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, 10)
        self.fc2 = nn.Linear(10, 32)
        self.fc3 = nn.Linear(32, output_dim)
        self.relu = nn.ReLU()
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out

@torch.no_grad()
def sample_task():
    # 读取数据
    df = pd.read_csv(r"D:\DL_Homework\Kaggle2_Titanic\统计建模\数据\data-2.csv", encoding='gb18030')
     # 将 'date' 列转换为日期时间格式
    df['date'] = pd.to_datetime(df['date'])
     # 添加年、月、日列
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['day'] = df['date'].dt.day

    data = df
    y = data['AQI指数']
    x = data.drop(['AQI指数','date'], axis=1)
     # 进行BOX-COX变换
    y = stats.boxcox(y)[0]
    scaler = StandardScaler()
    x = scaler.fit_transform(x)

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 33)
    x = x_train
    y = y_train
    loss_fct = nn.MSELoss()
    return x, y, loss_fct

def perform_k_training_steps(model, task, k, batch_size=256):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
    train_x, train_y, loss_fct = task
    for epoch in range(k * train_x.shape[0] // batch_size):
        # print(k * train_x.shape[0] // batch_size)
        batch_idx = np.random.choice(np.arange(len(train_x)), size=batch_size, replace=False)
        x_batch = torch.tensor(train_x[batch_idx], dtype=torch.float32)
        target = torch.tensor(train_y[batch_idx], dtype=torch.float32).unsqueeze(-1)
        loss = loss_fct(model(x_batch), target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return model.parameters()

# 训练经过元学习的模型，并记录loss值
model_after = MLP(input_dim=15, output_dim=1)
train_losses_after = []
reptile(model_after, 2000, sample_task, perform_k_training_steps, k=10)

   Iteration        MSE      RMSE       MAE         R2      MAPE
0        0.0  22.632145  4.757325  4.723523 -82.330489  0.947797
   Iteration        MSE      RMSE       MAE         R2      MAPE
1        1.0  20.990314  4.581519  4.545026 -76.285352  0.911384
   Iteration        MSE      RMSE       MAE        R2      MAPE
2        2.0  19.247978  4.387252  4.348139 -69.87015  0.871341
   Iteration        MSE      RMSE       MAE         R2      MAPE
3        3.0  17.447304  4.176997  4.135168 -63.240155  0.828099
   Iteration        MSE     RMSE       MAE         R2      MAPE
4        4.0  15.646059  3.95551  3.911036 -56.608051  0.782683
   Iteration        MSE      RMSE       MAE        R2      MAPE
5        5.0  13.894117  3.727481  3.680271 -50.15749  0.735965
   Iteration        MSE      RMSE       MAE         R2      MAPE
6        6.0  12.221722  3.495958  3.445887 -43.999804  0.688559
   Iteration        MSE      RMSE       MAE         R2     MAPE
7        7.0  10.694551  3.27025

KeyboardInterrupt: 

In [None]:
# 训练经过元学习的模型，并记录loss值
model_after = LSTM(input_dim=15, hidden_dim=64, output_dim=1)
train_losses_after = []
reptile(model_after, 110, sample_task, perform_k_training_steps, k=3)

In [None]:
df = pd.read_csv(r"D:\DL_Homework\Kaggle2_Titanic\统计建模\数据\data-2.csv", encoding='gb18030')
# Convert 'date' column to datetime format
df['date'] = pd.to_datetime(df['date'])
# Add year, month, day columns
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day

data = df
y = data['AQI指数']
x = data.drop(['AQI指数', 'date'], axis=1)
# Perform BOX-COX transformation
y = stats.boxcox(y)[0]
scaler = StandardScaler()
x = scaler.fit_transform(x)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=33)

In [None]:
y_pred_after = model_after(torch.tensor(x_test, dtype=torch.float32))
MSE = mean_squared_error(y_test, torch.tensor(y_pred_after).squeeze(-1))
RMSE = np.sqrt(mean_squared_error(y_test, torch.tensor(y_pred_after).squeeze(-1)))
MAE = mean_absolute_error(y_test, torch.tensor(y_pred_after).squeeze(-1))
MAE = mean_absolute_error(y_test, torch.tensor(y_pred_after).squeeze(-1))
R2 = r2_score(y_test, torch.tensor(y_pred_after).squeeze(-1))
def mape(y_true, y_pred):
    return np.mean(np.abs((y_pred - y_true) / y_true))
MAPE = mean_absolute_percentage_error(y_test, torch.tensor(y_pred_after).squeeze(-1))
print("===========================")
print("MSE:", MSE)
print("RMSE:", RMSE)
print("MAE:", MAE)
print("R2:", R2)
print("===========================")

#### 训练不经过元学习的模型

In [None]:
if __name__ == "__main__":
    # 训练不经历元学习的模型，并记录loss值
    model_before = MLP(input_dim=15, output_dim=1)
    optimizer_before = torch.optim.Adam(model_before.parameters(), lr=0.01)
    df = pd.read_csv(r"D:\DL_Homework\Kaggle2_Titanic\统计建模\数据\data-2.csv", encoding='gb18030')
     # 将 'date' 列转换为日期时间格式
    df['date'] = pd.to_datetime(df['date'])
     # 添加年、月、日列
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['day'] = df['date'].dt.day

    data = df
    y = data['AQI指数']
    x = data.drop(['AQI指数','date'], axis=1)
     # 进行BOX-COX变换
    y = stats.boxcox(y)[0]
    scaler = StandardScaler()
    x = scaler.fit_transform(x)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 33)

    loss_fct = nn.MSELoss()
    train_losses_before = []
    for epoch in range(2000):
        optimizer_before.zero_grad()
        y_pred_before_train = model_before(torch.tensor(x_train, dtype=torch.float32))
        loss_before_train = loss_fct(y_pred_before_train.squeeze(-1), torch.tensor(y_train, dtype=torch.float32))
        loss_before_train.backward()
        optimizer_before.step()


        y_pred_before = model_before(torch.tensor(x_test, dtype=torch.float32))
        loss_before = loss_fct(y_pred_before.squeeze(-1), torch.tensor(y_test, dtype=torch.float32))
        train_losses_before.append(loss_before.item())


        MSE = mean_squared_error(y_test, torch.tensor(y_pred_before).squeeze(-1))
        RMSE = np.sqrt(mean_squared_error(y_test, torch.tensor(y_pred_before).squeeze(-1)))
        MAE = mean_absolute_error(y_test, torch.tensor(y_pred_before).squeeze(-1))
        MAE = mean_absolute_error(y_test, torch.tensor(y_pred_before).squeeze(-1))
        R2 = r2_score(y_test, torch.tensor(y_pred_before).squeeze(-1))
        def mape(y_true, y_pred):
            return np.mean(np.abs((y_pred - y_true) / y_true))
        MAPE = mean_absolute_percentage_error(y_test, torch.tensor(y_pred_before).squeeze(-1))
        print("===========================")
        print("MSE:", MSE)
        print("RMSE:", RMSE)
        print("MAE:", MAE)
        print("R2:", R2)
        print("===========================")


In [None]:
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(train_losses_before)
plt.axhline(y=min(train_losses_before), color='r', linestyle='--') # 在最低点处绘制红色虚线
plt.title('Train Loss of Model Before Meta-Learning')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.subplot(1, 2, 2)
plt.plot(train_losses_after)
plt.axhline(y=min(train_losses_after), color='r', linestyle='--') # 在最低点处绘制红色虚线
plt.title('Train Loss of Model After Meta-Learning')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(12, 5))
plt.plot(train_losses_before, label='Before Meta-Learning')
plt.axhline(y=min(train_losses_before), color='r', linestyle='--') # 在Before Meta-Learning最低点处绘制红色虚线
plt.plot(train_losses_after, label='After Meta-Learning')
plt.axhline(y=min(train_losses_after), color='r', linestyle='--') # 在After Meta-Learning最低点处绘制红色虚线
plt.title('Train Loss Comparison')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(12, 5))
plt.plot(train_losses_before, label='Before Meta-Learning')
plt.plot(train_losses_after, label='After Meta-Learning')
plt.title('Train Loss Comparison')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.show()

In [None]:
print(train_losses_before)

In [None]:
print(train_losses_after)

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score, mean_absolute_error
import copy
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from tqdm import tqdm
from typing import Callable
import matplotlib.pyplot as plt

def reptile(model: nn.Module, nb_iterations: int, sample_task: Callable, perform_k_training_steps: Callable, k=30, epsilon=0.1):
    for _ in tqdm(range(nb_iterations)):
        task = sample_task()
        phi_tilde = perform_k_training_steps(copy.deepcopy(model), task, k)
        # Update phi
        with torch.no_grad():
            for p,g in zip(model.parameters(), phi_tilde):
                p += epsilon*(g - p)

class Net(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_dim, output_dim)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        x = self.fc1(x)
        x = self.sigmoid(x)
        return x

@torch.no_grad()
def sample_task():
    data = pd.read_excel(r"D:\DL_Homework\Kaggle2_Titanic\Iris\SMOGN\data\数据源信息_clean02.xls")
    x = data.drop(['价格'], axis=1)
    y = data['价格']
    y = y/y.max()
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33)
    x = x_train
    y = y_train
    loss_fct = nn.MSELoss()
    return x, y, loss_fct

def perform_k_training_steps(model, task, k, batch_size=256):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    train_x, train_y, loss_fct = task
    for epoch in range(k * train_x.shape[0] // batch_size):
        # print(k * train_x.shape[0] // batch_size)
        batch_idx = np.random.choice(np.arange(len(train_x)), size=batch_size, replace=False)
        x_batch = torch.tensor(train_x.iloc[batch_idx].values, dtype=torch.float32)
        target = torch.tensor(train_y.iloc[batch_idx].values, dtype=torch.float32).unsqueeze(-1)
        loss = loss_fct(model(x_batch), target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return model.parameters()


if __name__ == "__main__":
    # 训练不经历元学习的模型，并记录loss值
    model_before = Net(input_dim=77, output_dim=1)
    optimizer_before = torch.optim.Adam(model_before.parameters(), lr=0.01)
    data = pd.read_excel(r"D:\DL_Homework\Kaggle2_Titanic\Iris\SMOGN\data\数据源信息_clean02.xls")
    x = data.drop(['价格'], axis=1)
    y = data['价格']
    y = y/y.max()
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33)
    x = x_test
    y = y_test
    loss_fct = nn.MSELoss()
    train_losses_before = []
    train_rmse_before = []
    train_mae_before = []
    train_r2_before = []
    train_mape_before = []
    for epoch in range(445):
        optimizer_before.zero_grad()
        y_pred_before = model_before(torch.tensor(x.values, dtype=torch.float32))
        loss_before = loss_fct(y_pred_before.squeeze(-1), torch.tensor(y.values, dtype=torch.float32))
        loss_before.backward()
        optimizer_before.step()
        train_losses_before.append(loss_before.item())
        RMSE = np.sqrt(mean_squared_error(y_test, torch.tensor(y_pred_before).squeeze(-1)))
        MAE = mean_absolute_error(y_test, torch.tensor(y_pred_before).squeeze(-1))
        R2 = r2_score(y_test, torch.tensor(y_pred_before).squeeze(-1))
        def mape(y_true, y_pred):
            return np.mean(np.abs((y_pred - y_true) / y_true))
        MAPE = mean_absolute_percentage_error(y_test, torch.tensor(y_pred_before).squeeze(-1))

        train_rmse_before.append(RMSE.item())
        train_mae_before.append(MAE.item())
        train_r2_before.append(R2.item())
        train_mape_before.append(MAPE.item())

    # 训练经过元学习的模型，并记录loss值
    model_after = Net(input_dim=77, output_dim=1)
    reptile(model_after, 1, sample_task, perform_k_training_steps, k=30)
    optimizer_after = torch.optim.Adam(model_after.parameters(), lr=0.01)
    x = x_test
    y = y_test
    loss_fct = nn.MSELoss()
    train_losses_after = []
    train_rmse_after = []
    train_mae_after = []
    train_r2_after = []
    train_mape_after = []
    for epoch in range(445):
        optimizer_after.zero_grad()
        y_pred_after = model_after(torch.tensor(x.values, dtype=torch.float32))
        loss_after = loss_fct(y_pred_after.squeeze(-1), torch.tensor(y.values, dtype=torch.float32))
        loss_after.backward()
        optimizer_after.step()
        train_losses_after.append(loss_after.item())
        RMSE = np.sqrt(mean_squared_error(y_test, torch.tensor(y_pred_after).squeeze(-1)))
        MAE = mean_absolute_error(y_test, torch.tensor(y_pred_after).squeeze(-1))
        R2 = r2_score(y_test, torch.tensor(y_pred_after).squeeze(-1))
        def mape(y_true, y_pred):
            return np.mean(np.abs((y_pred - y_true) / y_true))
        MAPE = mean_absolute_percentage_error(y_test, torch.tensor(y_pred_after).squeeze(-1))

        train_rmse_after.append(RMSE.item())
        train_mae_after.append(MAE.item())
        train_r2_after.append(R2.item())
        train_mape_after.append(MAPE.item())


In [None]:
print(train_losses_before)

In [None]:
print(train_rmse_before)

In [None]:
print(train_mae_before)

In [None]:
print(train_r2_before)

In [None]:
print(train_losses_after)

In [None]:
print(train_rmse_after)

In [None]:
print(train_mae_after)

In [None]:
print(train_r2_after)