In [300]:
import tensorflow as tf
import pandas as pd
import numpy as np

from utils.df_loader import load_adult_df, load_compas_df, load_german_df, load_diabetes_df, load_breast_cancer_df
from utils.preprocessing import preprocess_df,get_columns_type
from sklearn.model_selection import train_test_split
from utils.dice import generate_dice_result, process_results
from utils.models import train_three_models, evaluation_test, save_three_models, load_three_models
from utils.save import save_result_as_csv

pd.options.mode.chained_assignment = None 

print('TF version: ', tf.__version__)
print('Eager execution enabled: ', tf.executing_eagerly()) # False

seed = 123
tf.random.set_seed(seed)
np.random.seed(seed)


TF version:  2.0.0
Eager execution enabled:  True


In [301]:
#### Select dataset ####'

dataset_name = 'breast_cancer' # [adult, german, compas]

if dataset_name == 'adult':
    dataset_loading_fn = load_adult_df
elif dataset_name == 'german':
    dataset_loading_fn = load_german_df
elif dataset_name == 'compas':
    dataset_loading_fn = load_compas_df
elif dataset_name == 'diabetes':
    dataset_loading_fn = load_diabetes_df
elif dataset_name == 'breast_cancer':
    dataset_loading_fn = load_breast_cancer_df
else:
    raise Exception("Unsupported dataset")

In [302]:
#### Load datafram info.
df_info = preprocess_df(dataset_loading_fn)


In [303]:
### Seperate to train and test set.
train_df, test_df = train_test_split(df_info.dummy_df, train_size=.8, random_state=seed, shuffle=True)

In [304]:
test_df

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,diagnosis
333,0.202045,0.171458,0.190657,0.104560,0.274713,0.077296,0.002281,0.014617,0.360101,0.228517,...,0.267591,0.157727,0.075575,0.300007,0.068545,0.004407,0.057285,0.246403,0.125541,0
273,0.130673,0.201556,0.122383,0.062100,0.340706,0.084381,0.025843,0.069930,0.515657,0.277169,...,0.236141,0.088052,0.041781,0.498778,0.064431,0.032292,0.177285,0.251528,0.175193,0
201,0.499740,0.324992,0.492779,0.342778,0.334477,0.308018,0.242737,0.372167,0.225253,0.104254,...,0.368337,0.443697,0.258995,0.441986,0.305333,0.280192,0.666323,0.268677,0.154991,1
178,0.285342,0.423064,0.264114,0.162418,0.089194,0.000000,0.003737,0.009205,0.169192,0.050126,...,0.453092,0.188107,0.104109,0.066565,0.006821,0.006371,0.031818,0.143899,0.022235,0
85,0.543282,0.297937,0.534241,0.395122,0.416268,0.263542,0.312793,0.437127,0.541414,0.216091,...,0.417377,0.506948,0.348457,0.453213,0.176199,0.252157,0.564261,0.419870,0.201692,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179,0.275877,0.113290,0.259139,0.154952,0.313803,0.056316,0.021539,0.066103,0.205051,0.239469,...,0.110075,0.180736,0.094745,0.297365,0.026351,0.022029,0.137113,0.042973,0.119048,0
192,0.129632,0.287792,0.117062,0.061336,0.152298,0.012453,0.000000,0.000000,0.299495,0.305602,...,0.234808,0.058967,0.029149,0.000000,0.000000,0.000000,0.000000,0.067810,0.069198,0
246,0.294335,0.261075,0.278764,0.168865,0.176221,0.079320,0.101593,0.054920,0.215657,0.134583,...,0.421109,0.188605,0.102438,0.257082,0.119830,0.183546,0.170790,0.236941,0.111111,0
211,0.229968,0.312141,0.219197,0.120679,0.325720,0.152199,0.062535,0.069235,0.238889,0.223463,...,0.345682,0.173365,0.088749,0.375289,0.155922,0.117492,0.237560,0.191208,0.163256,0


In [305]:
len(test_df)

114

In [306]:
len(train_df)

455

In [307]:
get_columns_type

<function utils.preprocessing.get_columns_type(df)>

In [308]:
df_info.columns_type

{'integer': [],
 'float': ['radius_mean',
  'texture_mean',
  'perimeter_mean',
  'area_mean',
  'smoothness_mean',
  'compactness_mean',
  'concavity_mean',
  'concave points_mean',
  'symmetry_mean',
  'fractal_dimension_mean',
  'radius_se',
  'texture_se',
  'perimeter_se',
  'area_se',
  'smoothness_se',
  'compactness_se',
  'concavity_se',
  'concave points_se',
  'symmetry_se',
  'fractal_dimension_se',
  'radius_worst',
  'texture_worst',
  'perimeter_worst',
  'area_worst',
  'smoothness_worst',
  'compactness_worst',
  'concavity_worst',
  'concave points_worst',
  'symmetry_worst',
  'fractal_dimension_worst'],
 'string': ['diagnosis']}

In [309]:
len(df_info.numerical_cols)

30

In [310]:
#df_info.categorical_cols

#cat_to_ohe_cat
len(df_info.ohe_feature_names)

#print(len(df_info.ohe_feature_names.keys()))

30

In [311]:
df_info.cat_to_ohe_cat

{}

In [312]:
len(df_info.categorical_cols
)

1

In [313]:
df_info.categorical_cols

['diagnosis']