In [1]:
import torch
import torch.nn as nn
from utils.seq_data_loader import LoanSimDataset, collate_fn
from torch.utils.data import DataLoader
import numpy as np
from torch import optim
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

In [2]:
scale_columns = [
    "action_num_actual",
    "gender",
    "age",
    "amount",
    "num_loan",
    "duration",
    "year_ratio",
    "diff_city",
    "marriage",
    "kids",
    "month_in",
    "housing",
    "edu",
    "motivation",
    "installment",
    "installment_timestep",
    "state_cum_overduelength",
    "remaining_debt",
    "state_capital",
    "state_interests",
    "state_penalty",
    "y_installment",
    "y_installment_timestep",
    "y_state_cum_overduelength",
    "y_remaining_debt",
    "y_state_capital",
    "y_state_interests",
    "y_state_penalty",
]

features = [
    "action_num_actual",
    "gender",
    "age",
    "amount",
    "num_loan",
    "duration",
    "year_ratio",
    "diff_city",
    "marriage",
    "kids",
    "month_in",
    "housing",
    "edu",
    "motivation",
    "installment",
    "installment_timestep",
    "state_cum_overduelength",
    "remaining_debt",
    "state_capital",
    "state_interests",
    "state_penalty",
]

In [3]:
state_targets = [
    "y_installment",
    "y_installment_timestep",
    "y_state_cum_overduelength",
    "y_remaining_debt",
    "y_state_capital",
    "y_state_interests",
    "y_state_penalty",
]

installment_targets = [
    "installment_done",
]

loan_targets = [
    "loan_done",
]

recovery_rate_targets = [
    "recovery_rate_weighted",
]

full_targets = (
    state_targets + installment_targets + loan_targets + recovery_rate_targets
)

In [4]:
train_dataset = LoanSimDataset(
    csv_file_path="./Res/simulator_data.csv",
    scale_columns=scale_columns,
    feature_columns=features,
    target_columns=full_targets,
    group="train",
)


# use the test data as validation data
val_dataset = LoanSimDataset(
    csv_file_path="./Res/simulator_data.csv",
    scale_columns=scale_columns,
    feature_columns=features,
    target_columns=full_targets,
    group="test",  # read the test data
)

train_dataloader = DataLoader(
    train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn
)


val_dataloader = DataLoader(
    val_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn
)

------------ Creating Sequences ------------
++ILJ423Sylep6c2NcsH1Q==
++gfGps0KgZTSvf0gJVmRg==
+/N5iqteMPhOHFpgCFmAOQ==
+/ZkkrGSt8cNbfyV+UDgjw==
+/f5MmadWSxfiZ6imQwQLg==
+/xQ5YT2mvVYZWahWo7rdg==
+/xuv8q/YPItDjPTJd4Yvw==
+/ySjgC2Okn6Z5XIR+CBMA==
+0Ld50qJNSYlgPKvQN/ZhA==
+0x2Hf4WjUnOvUZ12Nj8ug==
+2MBhv8ea4KOg7wWgAPEcQ==
+2PT2Zvv3HCOLPoDeSZakg==
+2QF+kKbxYUU5EUI1HiCuw==
+2Ri0FcALReema1Bcu0lNQ==
+2wSqsLPWkVx7jOOOW5VpQ==
+3OB6Kb7s1aNe2ryubScKA==
+3gj/psT3jojh/WkAFpJwg==
+47hLPtWPsshqfPTxaM7Rw==
+49Ottk73GRDEvIq7QQhEA==
+4Yepe9k8sJgir8vt99MmA==
+4fkQles78SpVDZB3GcV1A==
+4mSfxZptW3vK7vJy9Sslg==
+4zSG1PEjqfyU4IVCYFKmw==
+5+LOqgU3v03Q4qZFWL9wQ==
+58vKT7fdSNaxhjNFkOP5w==
+5QQSPxNuda0yw2VZk4E1Q==
+5o39xP8VBz9kn6VIWlf6A==
+5t4UEuGSK4b92W1KPkUVQ==
+6RraKd7G3tInKWyM1hGTw==
+7gCAQ7uUaMCVr+kAp67ag==
+8Nfp7yu19DvGBka7svABA==
+9EoaF4rnRlgokCA1FP2EQ==
+9fF+AVtUCVO50RG/IiMaw==
+9gPutMOTnwdyK7PjK1Jww==
+AE1bqn6KCtkCdz1hOo6WQ==
+BAIdDIesTxuPxEd5Eb+8g==
+BQjABu1aoup3rVDokzhQw==
+BckHxJhOk+zb/UO+D6GMg==
+Bflz

In [7]:
train_dataloader.dataset[0]

(tensor([[-0.9373,  0.5497,  0.1249,  0.3590, -0.2112, -0.9555, -0.7942, -0.4098,
          -0.0082, -0.0293, -0.5255,  0.5704,  0.9974, -1.0046, -1.4363, -0.9879,
          -0.6120,  0.8437, -0.7338, -0.8503, -0.4944],
         [-0.9373,  0.5497,  0.1249,  0.3590, -0.2112, -0.9555, -0.7942, -0.4098,
          -0.0082, -0.0293, -0.5255,  0.5704,  0.9974, -1.0046, -0.9200, -0.9879,
          -0.6120,  0.8437,  0.0290, -0.3791, -0.4944],
         [-0.9373,  0.5497,  0.1249,  0.3590, -0.2112, -0.9555, -0.7942, -0.4098,
          -0.0082, -0.0293, -0.5255,  0.5704,  0.9974, -1.0046, -0.4037, -0.9879,
          -0.6120,  0.8437,  0.7918,  0.0921, -0.4944],
         [-0.9373,  0.5497,  0.1249,  0.3590, -0.2112, -0.9555, -0.7942, -0.4098,
          -0.0082, -0.0293, -0.5255,  0.5704,  0.9974, -1.0046,  0.1126, -0.9879,
          -0.6120,  0.8437,  1.5546,  0.5632, -0.4944],
         [-0.9373,  0.5497,  0.1249,  0.3590, -0.2112, -0.9555, -0.7942, -0.4098,
          -0.0082, -0.0293, -0.5255,  

In [5]:
import pandas as pd

data = pd.read_csv('./Res/simulator_data.csv')
data

Unnamed: 0,group,loan_id,action_num_actual,gender,age,amount,num_loan,duration,year_ratio,diff_city,...,y_installment,y_installment_timestep,y_state_cum_overduelength,y_remaining_debt,y_state_capital,y_state_interests,y_state_penalty,installment_done,loan_done,recovery_rate_weighted
0,train,/+6C2lDDYJgzzCXpn96AFA==,0,1,18,2000,1,6,16,40421.53,...,2,1,2,2000.000000,666.666667,53.333333,3.333333,1,0,0.205350
1,train,/+6C2lDDYJgzzCXpn96AFA==,1,1,18,2000,1,6,16,40421.53,...,2,2,7,2000.000000,666.666667,53.333333,11.666667,0,0,0.000000
2,train,/+6C2lDDYJgzzCXpn96AFA==,2,1,18,2000,1,6,16,40421.53,...,2,3,7,2000.000000,666.666667,53.333333,11.666667,0,0,0.000000
3,train,/+6C2lDDYJgzzCXpn96AFA==,3,1,18,2000,1,6,16,40421.53,...,2,4,8,2000.000000,666.666667,53.333333,13.333333,0,0,0.000000
4,train,/+6C2lDDYJgzzCXpn96AFA==,4,1,18,2000,1,6,16,40421.53,...,3,1,0,1666.666667,666.666667,53.333333,0.000000,1,0,0.177079
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200009,test,ZWocy2KIKqJbgKZVoXZIFA==,0,1,30,2900,1,5,18,0.00,...,5,1,2,1740.000000,1740.000000,130.500000,8.700000,1,0,0.000000
200010,test,ZWocy2KIKqJbgKZVoXZIFA==,1,1,30,2900,1,5,18,0.00,...,5,2,-15,1740.000000,1740.000000,130.500000,-37.700000,0,0,0.000000
200011,test,ZWocy2KIKqJbgKZVoXZIFA==,0,1,30,2900,1,5,18,0.00,...,5,3,24,1740.000000,1740.000000,130.500000,78.300000,0,0,0.000000
200012,test,ZWocy2KIKqJbgKZVoXZIFA==,0,1,30,2900,1,5,18,0.00,...,5,4,-1,1740.000000,1740.000000,130.500000,8.700000,0,0,0.000000


In [6]:
data_weired = data[data['loan_id'] == '++ILJ423Sylep6c2NcsH1Q==']
data_weired

Unnamed: 0,group,loan_id,action_num_actual,gender,age,amount,num_loan,duration,year_ratio,diff_city,...,y_installment,y_installment_timestep,y_state_cum_overduelength,y_remaining_debt,y_state_capital,y_state_interests,y_state_penalty,installment_done,loan_done,recovery_rate_weighted
2540,train,++ILJ423Sylep6c2NcsH1Q==,0,1,29,3000,1,5,14,0.0,...,2,1,0,3000.0,1200.0,70.0,0.0,1,0,0.234979
2541,train,++ILJ423Sylep6c2NcsH1Q==,0,1,29,3000,1,5,14,0.0,...,3,1,0,3000.0,1800.0,105.0,0.0,1,0,0.234979
2542,train,++ILJ423Sylep6c2NcsH1Q==,0,1,29,3000,1,5,14,0.0,...,4,1,0,3000.0,2400.0,140.0,0.0,1,0,0.0
2543,train,++ILJ423Sylep6c2NcsH1Q==,0,1,29,3000,1,5,14,0.0,...,5,1,0,3000.0,3000.0,175.0,0.0,1,0,0.0
2544,train,++ILJ423Sylep6c2NcsH1Q==,0,1,29,3000,1,5,14,0.0,...,5,1,0,3000.0,3000.0,175.0,0.0,1,1,0.0
