*Author: Daniel Puente Viejo*  

<img src="https://cdn-icons-png.flaticon.com/512/5043/5043998.png" width="100" height="100" float ="right">   



### <a id='1'>1. Loading of Libraries and Data</a>
----

* Common libraries

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import itertools

* Sklearn

In [4]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, recall_score, precision_score, confusion_matrix, roc_auc_score

Pytorch

In [2]:
import torch

from torch_geometric.data import HeteroData
import torch_geometric.transforms as T
from torch_geometric.nn import Sequential, Linear, SAGEConv, to_hetero
from torch.nn import ReLU
import torch.nn.functional as F

* Paths and warnings

In [5]:
import warnings
warnings.filterwarnings("ignore")

path = "../data/eda_generated_data/"
output_path = "../data/graph_generated_data/"

* Load data

In [7]:
def load_pickle(path, file_name):
    with open(path + file_name, 'rb') as f: return pickle.load(f)

df_train = load_pickle(path, "df_train.pkl")
df_val = load_pickle(path, "df_val.pkl")
df_test = load_pickle(path, "df_test.pkl")
scaler = load_pickle(path, "scaler.pkl")

In [8]:
df_train

Unnamed: 0,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_GOODS_PRICE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,FLAG_EMP_PHONE,FLAG_WORK_PHONE,...,FLAG_OWN_REALTY,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,WEEKDAY_APPR_PROCESS_START,ORGANIZATION_TYPE,SK_ID_CURR,TARGET
162269,1,225000.0,450000.0,0.010032,-14495,-585,-1270.0,-4885,1,1,...,Y,"Spouse, partner",Commercial associate,Secondary / secondary special,Married,House / apartment,SUNDAY,Self-employed,360052,0
135440,0,67500.0,684000.0,0.020246,-16922,-296,-9589.0,-460,1,0,...,Y,Family,Commercial associate,Secondary / secondary special,Married,House / apartment,MONDAY,Restaurant,121461,1
117806,2,81000.0,247500.0,0.019101,-12822,-3210,-698.0,-1912,1,0,...,Y,Unaccompanied,Working,Higher education,Married,House / apartment,SUNDAY,Business Entity Type 2,348484,0
126749,1,103500.0,270000.0,0.035792,-16540,-337,-6043.0,-69,1,1,...,Y,"Spouse, partner",Working,Higher education,Married,House / apartment,THURSDAY,Self-employed,100748,0
182910,0,202500.0,787500.0,0.024610,-14948,-125,-8862.0,-4042,1,0,...,Y,Unaccompanied,Commercial associate,Higher education,Married,House / apartment,FRIDAY,Business Entity Type 3,346889,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38113,0,270000.0,1377000.0,0.016612,-16480,-1716,-3847.0,-31,1,0,...,Y,Unaccompanied,Commercial associate,Higher education,Married,House / apartment,TUESDAY,Telecom,390435,0
90033,0,261000.0,855000.0,0.011703,-14403,-3640,-348.0,-5212,1,0,...,Y,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,MONDAY,Trade: type 6,246671,0
92815,0,247500.0,495000.0,0.031329,-22978,-4684,-2677.0,-4158,1,1,...,Y,Family,Working,Higher education,Married,House / apartment,SUNDAY,Industry: type 7,278383,0
110590,0,157500.0,450000.0,0.009657,-16175,-974,-2969.0,-7197,1,0,...,N,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,FRIDAY,School,248551,0
