### Global Settings

In [None]:
# ------------------------
# define global settings
# ------------------------
SETTING_PIPELINE_NAME = "140_clean version"

# ------------------------
# import packages
# ------------------------
import os

DIR_HOME = "XXXXXX"
DIR_CURRENT = os.getcwd()
DIR_PIPELINE = os.path.join(DIR_HOME, "2_pipeline", SETTING_PIPELINE_NAME)
if not os.path.exists(DIR_PIPELINE):
    os.mkdir(DIR_PIPELINE)
    os.mkdir(os.path.join(DIR_PIPELINE, "out"))
    os.mkdir(os.path.join(DIR_PIPELINE, "store"))
    os.mkdir(os.path.join(DIR_PIPELINE, "temp"))

In [None]:
import os
import torch

# Folders
csv_dir = "XXXXXX"
csv_files = [os.path.join(csv_dir, f) for f in os.listdir(csv_dir) if f.endswith('.csv')]

# Time
ACTIVE_DURATION_THRESHOLD = 150 # minutes
normal_test_point = 44
covid_train_point = 54
covid_test_point = 118

# Columns
# x_cols = ['user_gender', 'register_index', 'TEMP', 'TEMP2', 'WIND', 'HUMI', 'HUMI2', 'VISI', 'PRES', 'CLOD', 'PRCP']
# x_cols = ['user_gender', 'register_index', 'TEMP', 'TEMP2', 'WIND', 'HUMI', 'HUMI2', 'VISI', 'PRES', 'CLOD', 'PRCP', 'IS_COVID_START', 'POLICY']
# x_cols = ['user_gender', 'register_index', 'TEMP', 'TEMP2', 'WIND', 'HUMI', 'HUMI2', 'VISI', 'PRES', 'CLOD', 'PRCP', 'IS_COVID_START', 'POLICY','DURATION_1', 'DURATION_2', 'DURATION_3']
x_cols = ['user_gender', 'register_index', 'TEMP', 'TEMP2', 'WIND', 'HUMI', 'HUMI2', 'VISI', 'PRES', 'CLOD', 'PRCP', 'IS_COVID_START', 'POLICY','DURATION_1', 'DURATION_2', 'DURATION_3', 'LG_FOLLOW']
y_col = 'DURATION_0'

# Train
BATCH_SIZE = 1024
device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')

### Dataset Function

In [None]:
# Define the conditions as functions
def training_condition(idx):
    return idx < 3 + normal_test_point or 3 + covid_train_point <= idx < 3 + covid_test_point

def testing_normal_condition(idx):
    return 3 + normal_test_point <= idx < 3 + covid_train_point

def testing_covid_condition(idx):
    return idx >= 3 + covid_test_point

In [None]:
feature_mean = {'TEMP': 14.007015458415168, 'TEMP2': 279.26179025763645, 'WIND': 11.920575962103781, 'HUMI': 70.86096441758492, 'HUMI2': 5283.037685289832, 'VISI': 8.704992884116393, 'PRES': 1014.7379409770316, 'CLOD': 43.15788405638498, 'PRCP': 24.918650844505734, 'DURATION_0': 102.90183371116115, 'DURATION_1': 102.90183371116115, 'DURATION_2': 102.90183371116115,'DURATION_3': 102.90183371116115}
feature_std = {'TEMP': 8.328565701080262, 'TEMP2': 250.7086255558244, 'WIND': 5.50769354766836, 'HUMI': 13.86840936112677, 'HUMI2': 1766.4921236773441, 'VISI': 0.8701007169616094, 'PRES': 6.589018889937119, 'CLOD': 22.034113767171334, 'PRCP': 34.64585327001472, 'DURATION_0': 156.30188955998761, 'DURATION_1': 156.30188955998761, 'DURATION_2': 156.30188955998761, 'DURATION_3': 156.30188955998761}

THRESHOLD_0 = (0 - feature_mean['DURATION_0']) / feature_std['DURATION_0']
THRESHOLD_150 = (150 - feature_mean['DURATION_0']) / feature_std['DURATION_0']

In [None]:
# Dataset type
DT_TRAIN = 1
DT_TEST_NORMAL = 2
DT_TEST_COVID = 3

In [None]:
import os
import pandas as pd
from torch.utils.data import Dataset
import torch

class CSVDataset(Dataset):
    def __init__(self, csv_data, x_cols, y_col, dataset_type):
        self.x_cols = x_cols
        self.y_col = y_col
        self.csv_files = csv_files
        
        # Get right data type
        if dataset_type == DT_TRAIN:
            data = csv_data[csv_data["INDEX"].apply(training_condition)]
        elif dataset_type == DT_TEST_NORMAL:
            data = csv_data[csv_data["INDEX"].apply(testing_normal_condition)]
        elif dataset_type == DT_TEST_COVID:
            data = csv_data[csv_data["INDEX"].apply(testing_covid_condition)]
        else:
            raise ValueError("Invalid dataset type")
        
        # standardize data
        for feat in feature_mean:
            data[feat] = (data[feat] - feature_mean[feat]) / feature_std[feat]
        
        # Extract features and labels
        self.x = torch.tensor(data[self.x_cols].values, dtype=torch.float32)
        self.y = torch.tensor(data[self.y_col].values.reshape(-1, 1), dtype=torch.float32)


    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

class CSVDatasetWeighted(Dataset):
    def __init__(self, csv_data, x_cols, y_col, dataset_type):
        self.x_cols = x_cols
        self.y_col = y_col
        self.csv_data = csv_data
        
        # Get right data type
        if dataset_type == DT_TRAIN:
            data = csv_data[csv_data["INDEX"].apply(training_condition)]
        elif dataset_type == DT_TEST_NORMAL:
            data = csv_data[csv_data["INDEX"].apply(testing_normal_condition)]
        elif dataset_type == DT_TEST_COVID:
            data = csv_data[csv_data["INDEX"].apply(testing_covid_condition)]
        else:
            raise ValueError("Invalid dataset type")
        
        # standardize data
        for feat in feature_mean:
            data[feat] = (data[feat] - feature_mean[feat]) / feature_std[feat]

        # Balance the dataset
        active_records = data[data[self.y_col] >= THRESHOLD_150]
        inactive_records = data[data[self.y_col] < THRESHOLD_150]
        
        if len(inactive_records) > len(active_records):
            inactive_records = inactive_records.sample(len(active_records), random_state=42)
        elif len(active_records) > len(inactive_records):
            active_records = active_records.sample(len(inactive_records), random_state=42)

        balanced_data = pd.concat([active_records, inactive_records])
        
        # Extract features and labels
        self.x = torch.tensor(balanced_data[self.x_cols].values, dtype=torch.float32)
        self.y = torch.tensor(balanced_data[self.y_col].values.reshape(-1, 1), dtype=torch.float32)

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [None]:
def compute_weights(y_train, threshold):
    print("Debugging weights computation:")
    
    # Ensure y_train is a tensor and convert to numpy array for computation
    y_train = y_train.cpu().numpy().squeeze()
    print(f"y_train (after squeeze): {y_train.shape}")
    
    # Calculate active and inactive masks
    y_train_1 = (y_train >= threshold).astype(float)
    y_train_0 = (y_train < threshold).astype(float)
    print(f"Number of active records: {sum(y_train_1)}, Number of inactive records: {sum(y_train_0)}")
    
    # Calculate weights
    total_records = sum(y_train_1) + sum(y_train_0)
    weight_1 = sum(y_train_0) / total_records
    weight_0 = sum(y_train_1) / total_records
    print(f"Weight active: {weight_0}")
    print(f"Weight inactive: {weight_1}")
    
    # Assign weights
    y_train_weight = y_train_1 * weight_1 + y_train_0 * weight_0
    
    # Normalize weights
    y_train_weight /= y_train_weight.sum()
    
    print("Sample weights tensor:", y_train_weight[:10])  # Print first 10 weights for debugging
    print(f"Total weights sum: {y_train_weight.sum()} (should be close to 1.0)")
    
    return y_train_weight

In [None]:
from torch.utils.data import DataLoader, WeightedRandomSampler
from tqdm import tqdm

# Read and combine all CSV files
print("Reading csvs")
data = pd.concat([pd.read_csv(file) for file in tqdm(csv_files)], ignore_index=True)

train_dataset = CSVDataset(data, x_cols, y_col, DT_TRAIN)
test_normal_dataset = CSVDataset(data, x_cols, y_col, DT_TEST_NORMAL)
test_covid_dataset = CSVDataset(data, x_cols, y_col, DT_TEST_COVID)
train_dataset_weighted = CSVDatasetWeighted(data, x_cols, y_col, DT_TRAIN)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
test_normal_dataloader = DataLoader(test_normal_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
test_covid_dataloader = DataLoader(test_covid_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
train_dataloader_weighted = DataLoader(train_dataset_weighted, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

# weights = compute_weights(train_dataset.y, THRESHOLD_150)
# # Create WeightedRandomSampler
# sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)
# # Create DataLoader
# train_dataloader_weighted = DataLoader(train_dataset, sampler=sampler, batch_size=1024)

# Debugging the dataloader
for inputs, labels in train_dataloader:
    print(f"Batch inputs dtype: {inputs.dtype}, shape: {inputs.shape}, device: {inputs.device}")
    print(f"Batch labels dtype: {labels.dtype}, shape: {labels.shape}, device: {labels.device}")
    break  # Just to print the first batch

### Linear Regression

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score, f1_score

def scores(y_true, y_pred, mode):
    y_true = np.array(y_true, dtype=np.float64)
    y_pred = np.array(y_pred, dtype=np.float64)
    y_pred[y_pred<THRESHOLD_0] = THRESHOLD_0

    y_under_150 = (y_true<THRESHOLD_150) * 1
    y_above_150 = (y_true>=THRESHOLD_150) * 1

    pred_under_150 = (y_pred<THRESHOLD_150) * 1
    pred_above_150 = (y_pred>=THRESHOLD_150) * 1

    y_0_pred_0 = int(sum(y_under_150 & pred_under_150))
    y_0_pred_1 = int(sum(y_under_150 & pred_above_150))
    y_1_pred_0 = int(sum(y_above_150 & pred_under_150))
    y_1_pred_1 = int(sum(y_above_150 & pred_above_150))

    total = len(y_true)
    correct = int(y_0_pred_0) + int(y_1_pred_1)

    r2 = r2_score(y_true, y_pred)
    f1 = f1_score(y_above_150, pred_above_150)

    ret = {
        mode + ' y_0 pred_0': y_0_pred_0,
        mode + ' y_0 pred_1': y_0_pred_1,
        mode + ' y_1 pred_0': y_1_pred_0,
        mode + ' y_1 pred_1': y_1_pred_1,
        mode + ' lazy acc': y_0_pred_0 / (y_0_pred_0 + y_0_pred_1),
        mode + ' active acc': y_1_pred_1 / (y_1_pred_0 + y_1_pred_1),
        mode + ' r2_score': r2,
        mode + ' f1_score': f1,
        mode + ' mse': mean_squared_error(y_true, y_pred),
        mode + ' acc': correct/total
    }

    return ret

def evaluate_model(X_train, y_train, X_test_normal, y_test_normal, X_test_covid, y_test_covid,pmodel):
    ret = {}

    train_pred = pmodel.predict(X_train)
    test_normal_pred = pmodel.predict(X_test_normal)
    test_covid_pred = pmodel.predict(X_test_covid)

    train_pred[train_pred<THRESHOLD_0] = THRESHOLD_0
    test_normal_pred[test_normal_pred<THRESHOLD_0] = THRESHOLD_0
    test_covid_pred[test_covid_pred<THRESHOLD_0] = THRESHOLD_0

    res_train = scores(y_train, train_pred, 'Train')
    res_test_normal = scores(y_test_normal, test_normal_pred, 'Test normal')
    res_test_covid = scores(y_test_covid, test_covid_pred, 'Test covid')

    for key in res_train:
        ret[key] = res_train[key]

    for key in res_test_normal:
        ret[key] = res_test_normal[key]

    for key in res_test_covid:
        ret[key] = res_test_covid[key]

    return ret

In [None]:
import pprint

def regression_linear(X_train, y_train):
    from sklearn.linear_model import LinearRegression
    model = LinearRegression()
    model.fit(X_train, y_train)
    return model

nn_train_x = train_dataset.x.cpu().numpy()
nn_train_y = train_dataset.y.cpu().numpy()
nn_test_normal_x = test_normal_dataset.x.cpu().numpy()
nn_test_normal_y = test_normal_dataset.y.cpu().numpy()
nn_test_covid_x = test_covid_dataset.x.cpu().numpy()
nn_test_covid_y = test_covid_dataset.y.cpu().numpy()

model = regression_linear(nn_train_x, nn_train_y)
ret = evaluate_model(nn_train_x, nn_train_y, nn_test_normal_x, nn_test_normal_y, nn_test_covid_x, nn_test_covid_y, model)
pprint.pprint(ret)

### Model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

class NN1(nn.Module):
    def __init__(self, input_size):
        super(NN1, self).__init__()
        self.fc1 = nn.Linear(input_size, 16)
        self.fc2 = nn.Linear(16, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
class NN2(nn.Module):
    def __init__(self, input_size):
        super(NN2, self).__init__()
        self.fc1 = nn.Linear(input_size, 16)
        self.silu = nn.SiLU()
        self.fc2 = nn.Linear(16, 1)

    def forward(self, x):
        x = self.silu(self.fc1(x))
        x = self.fc2(x)
        return x

class NN3(nn.Module):
    def __init__(self, input_size):
        super(NN3, self).__init__()
        self.fc1 = nn.Linear(input_size, 16)
        self.fc2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(8, 1)
        self.silu1 = nn.SiLU()
        self.silu2 = nn.SiLU()

    def forward(self, x):
        x = self.silu1(self.fc1(x))
        x = self.silu2(self.fc2(x))
        x = self.fc3(x)
        return x

class NN4(nn.Module):
    def __init__(self, input_size):
        super(NN4, self).__init__()
        self.fc1 = nn.Linear(input_size, 16)
        self.fc2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(8, 4)
        self.fc4 = nn.Linear(4, 1)
        self.silu1 = nn.SiLU()
        self.silu2 = nn.SiLU()
        self.silu3 = nn.SiLU()

    def forward(self, x):
        x = self.silu1(self.fc1(x))
        x = self.silu2(self.fc2(x))
        x = self.silu3(self.fc3(x))
        x = self.fc4(x)
        return x

class NN5(nn.Module):
    def __init__(self, input_size):
        super(NN5, self).__init__()
        self.fc1 = nn.Linear(input_size, 32)
        self.fc2 = nn.Linear(32, 32)
        self.fc3 = nn.Linear(32, 32)
        self.fc4 = nn.Linear(32, 1)
        self.silu1 = nn.SiLU()
        self.silu2 = nn.SiLU()
        self.silu3 = nn.SiLU()
        self.dropout1 = nn.Dropout(p=0.5)
        self.dropout2 = nn.Dropout(p=0.5)
        self.dropout3 = nn.Dropout(p=0.5)

    def forward(self, x):
        x = self.silu1(self.fc1(x))
        x = self.dropout1(x)
        x = self.silu2(self.fc2(x))
        x = self.dropout2(x)
        x = self.silu3(self.fc3(x))
        x = self.dropout3(x)
        x = self.fc4(x)
        return x

class NN6(nn.Module):
    def __init__(self, input_size):
        super(NN6, self).__init__()
        self.fc1 = nn.Linear(input_size, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, 16)
        self.fc4 = nn.Linear(16, 1)
        self.silu1 = nn.SiLU()
        self.silu2 = nn.SiLU()
        self.silu3 = nn.SiLU()

    def forward(self, x):
        x = self.silu1(self.fc1(x))
        x = self.silu2(self.fc2(x))
        x = self.silu3(self.fc3(x))
        x = self.fc4(x)
        return x

class NN7(nn.Module):
    def __init__(self, input_size):
        super(NN7, self).__init__()
        self.fc1 = nn.Linear(input_size, 16)
        self.fc2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(8, 4)
        self.fc4 = nn.Linear(4, 2)
        self.fc5 = nn.Linear(2, 1)
        self.silu1 = nn.SiLU()
        self.silu2 = nn.SiLU()
        self.silu3 = nn.SiLU()
        self.silu4 = nn.SiLU()

    def forward(self, x):
        x = self.silu1(self.fc1(x))
        x = self.silu2(self.fc2(x))
        x = self.silu3(self.fc3(x))
        x = self.silu4(self.fc4(x))
        x = self.fc5(x)
        return x
    
class NN8(nn.Module):
    def __init__(self, input_size):
        super(NN8, self).__init__()
        self.fc1 = nn.Linear(input_size, 32)
        self.fc2 = nn.Linear(32, 32)
        self.fc3 = nn.Linear(32, 1)
        self.silu1 = nn.SiLU()
        self.silu2 = nn.SiLU()

    def forward(self, x):
        x = self.silu1(self.fc1(x))
        x = self.silu2(self.fc2(x))
        x = self.fc3(x)
        return x

# Define model, loss function, and optimizer
input_size = len(x_cols)
criterion = nn.MSELoss()
# criterion = nn.L1Loss()

### Neural Network

In [None]:
from tqdm import tqdm
import torch.optim.lr_scheduler as lr_scheduler
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score, f1_score

def scores(y_true, y_pred, mode):
    y_true = np.array(y_true, dtype=np.float64)
    y_pred = np.array(y_pred, dtype=np.float64)
    y_pred[y_pred<THRESHOLD_0] = THRESHOLD_0

    y_under_150 = (y_true<THRESHOLD_150) * 1
    y_above_150 = (y_true>=THRESHOLD_150) * 1

    pred_under_150 = (y_pred<THRESHOLD_150) * 1
    pred_above_150 = (y_pred>=THRESHOLD_150) * 1

    y_0_pred_0 = int(sum(y_under_150 & pred_under_150))
    y_0_pred_1 = int(sum(y_under_150 & pred_above_150))
    y_1_pred_0 = int(sum(y_above_150 & pred_under_150))
    y_1_pred_1 = int(sum(y_above_150 & pred_above_150))

    total = len(y_true)
    correct = int(y_0_pred_0) + int(y_1_pred_1)

    r2 = r2_score(y_true, y_pred)
    f1 = f1_score(y_above_150, pred_above_150)

    ret = {
        mode + ' y_0 pred_0': y_0_pred_0,
        mode + ' y_0 pred_1': y_0_pred_1,
        mode + ' y_1 pred_0': y_1_pred_0,
        mode + ' y_1 pred_1': y_1_pred_1,
        mode + ' lazy acc': y_0_pred_0 / (y_0_pred_0 + y_0_pred_1),
        mode + ' active acc': y_1_pred_1 / (y_1_pred_0 + y_1_pred_1),
        mode + ' r2_score': r2,
        mode + ' f1_score': f1,
        mode + ' mse': mean_squared_error(y_true, y_pred),
        mode + ' acc': correct/total
    }

    return ret

def get_metrics(y_true, y_pred):
    y_pred[y_pred<THRESHOLD_0] = THRESHOLD_0

    y_under_150 = (y_true<THRESHOLD_150) * 1
    y_above_150 = (y_true>=THRESHOLD_150) * 1

    pred_under_150 = (y_pred<THRESHOLD_150) * 1
    pred_above_150 = (y_pred>=THRESHOLD_150) * 1

    y_0_pred_0 = int(sum(y_under_150 & pred_under_150))
    y_0_pred_1 = int(sum(y_under_150 & pred_above_150))
    y_1_pred_0 = int(sum(y_above_150 & pred_under_150))
    y_1_pred_1 = int(sum(y_above_150 & pred_above_150))
    
    return y_0_pred_0, y_0_pred_1, y_1_pred_0, y_1_pred_1

def nn_evaluate(pmodel, data_loader, mode='unassigned'):
    y_true = np.array([])
    y_pred = np.array([])
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(tqdm(data_loader)):
            slabels = labels.reshape((-1,)).numpy()
            y_true = np.concatenate((y_true, slabels))
            outputs = pmodel(inputs)
            spredicted = outputs.reshape((-1,)).numpy()
            y_pred = np.concatenate((y_pred, spredicted))

    return scores(y_true, y_pred, mode)

def nn_train(pmodel, ploader, pcriterion, poptimizer, model_path, num_epochs=10):
    ret = []
    best_f1 = 0
    
    scheduler = lr_scheduler.ExponentialLR(poptimizer, gamma=0.5)
    for epoch in range(num_epochs):
        running_loss = 0
        print(f"\n\n")
        print(f"===== ===== ===== ===== ===== ")
        print(f"Epoch: {epoch}")
        
        print("Evaluating")
        test_normal_ret = nn_evaluate(pmodel, test_normal_dataloader, mode='normal')
        test_covid_ret = nn_evaluate(pmodel, test_covid_dataloader, mode='covid')
        print(f"epoch-{epoch}, normal_acc: {test_normal_ret['normal acc']:.3f}, normal_f1: {test_normal_ret['normal f1_score']:.3f}, normal_mse: {test_normal_ret['normal mse']:.3f}")
        print(f"epoch-{epoch}, covid_acc: {test_covid_ret['covid acc']:.3f}, covid_f1: {test_covid_ret['covid f1_score']:.3f}, covid_mse: {test_covid_ret['covid mse']:.3f}")

        temp_ret = {'epoch': epoch}
        temp_ret.update(test_normal_ret)
        temp_ret.update(test_covid_ret)

        ret.append(temp_ret)
            
        agg_f1 = (test_normal_ret['normal f1_score'] + test_covid_ret['covid f1_score']) / 2
        if agg_f1 >= best_f1:
            best_f1 = agg_f1
            torch.save(pmodel, os.path.join(DIR_PIPELINE, 'store', 'model weight', model_path))
        
        print("Training")
        for i, (inputs, labels) in enumerate(tqdm(ploader)):
            # Forward pass            
            outputs = pmodel(inputs)
            loss = pcriterion(outputs, labels)
        
            # Backward pass and optimization
            poptimizer.zero_grad()
            loss.backward()
            poptimizer.step()
            
            running_loss += loss.item()
        
        # Step the scheduler at the end of each epoch
        scheduler.step()
            
    return pmodel, ret

In [None]:
model = NN7(input_size)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0005)
# optimizer = optim.SGD(model.parameters(), lr=0.0005, momentum=0.9)
model, ret = nn_train(
    model,
    train_dataloader,
    criterion,
    optimizer,
    "NN7.pth",
    10
)

### Evaluate

In [None]:
ret_dta = pd.DataFrame(ret)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Set the epoch column as the index
ret_dta.set_index('epoch', inplace=True)

# Plotting the F1 scores
plt.figure(figsize=(10, 6))
sns.lineplot(data=ret_dta[['normal f1', 'covid f1']])
plt.title('F1 Score per Epoch')
plt.xlabel('Epoch')
plt.ylabel('F1 Score')
plt.legend(title='Test Set', labels=['Normal F1', 'COVID F1'])
plt.show()

In [None]:
model_unweighted = torch.load(os.path.join(DIR_PIPELINE, 'store', 'model weight', "NN7-5.pth"))

In [None]:
model_unweighted.eval()
# ret1 = nn_evaluate(model_unweighted, train_dataloader, mode='train')
ret2 = nn_evaluate(model_unweighted, test_normal_dataloader, mode='normal')
ret3 = nn_evaluate(model_unweighted, test_covid_dataloader, mode='covid')
# pprint.pprint(ret1)
pprint.pprint(ret2)
pprint.pprint(ret3)