In [None]:
import tensorflow as tf
from tensorflow import keras

import os
import tempfile

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from imblearn.over_sampling import SMOTE ## pip install -U imbalanced-learn
import sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
mpl.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

# Tarea 6. curso Hello world con TF.2

## Punto 1. Conceptos:
    - Investigue las metricas mas usadas para un problema de clasificacion binaria.
(puede guiarse de este foro: https://neptune.ai/blog/evaluation-metrics-binary-classification)


## Punto 2. Problema de clasificacion con imbalanced data
      -Crear un modelo de Deep-Learning para la clasificacion de transacciones fraudulentas
      -Adicionar un bias inicial para reducir el problema de imbalanced data
      -Adicionar un peso en las clases para reducir el problema de imbalanced data
      - Usar oversampling y Synthetic Minority Oversampling Technique (SMOTE)

In [None]:
### Algunas funciones utiles:

def plot_cm(tf_data,model, p=0.5):
  true_list=[]
  pred_list=[]
  for ds,lb in tf_data.take(1000):
    pred = model.predict(ds)
    pred_list.append(pred)
    true_list.append(lb)
  predictions=np.squeeze(np.concatenate(pred_list))
  labels=np.squeeze(np.concatenate(true_list))
  cm = confusion_matrix(labels, predictions > p)
  plt.figure(figsize=(5,5))
  sns.heatmap(cm, annot=True, fmt="d")
  plt.title('Confusion matrix @{:.2f}'.format(p))
  plt.ylabel('Actual label')
  plt.xlabel('Predicted label')


  print('Legitimate Transactions Detected (True Negatives): ', cm[0][0])
  print('Legitimate Transactions Incorrectly Detected (False Positives): ', cm[0][1])
  print('Fraudulent Transactions Missed (False Negatives): ', cm[1][0])
  print('Fraudulent Transactions Detected (True Positives): ', cm[1][1])
  print('Total Fraudulent Transactions: ', np.sum(cm[1]))
def plot_roc(tf_data,model):
  true_list=[]
  pred_list=[]
  for ds,lb in tf_data.take(1000):
    pred = model.predict(ds)
    pred_list.append(pred)
    true_list.append(lb)
  predictions=np.squeeze(np.concatenate(pred_list))
  labels=np.squeeze(np.concatenate(true_list))  
  fp, tp, _ = sklearn.metrics.roc_curve(labels, predictions)

  plt.plot(100*fp, 100*tp, linewidth=2)
  plt.xlabel('False positives [%]')
  plt.ylabel('True positives [%]')
  plt.xlim([-0.5,20])
  plt.ylim([80,100.5])
  plt.grid(True)
  ax = plt.gca()
  ax.set_aspect('equal')

def evaluation(tf_data,model):
    baseline_results = model.evaluate(tf_data,
                                   verbose=1)
    for name, value in zip(model.metrics_names, baseline_results):
      print(name, ': ', value)
    print()
def plot_metrics(history):
  metrics = ['loss', 'prc', 'precision', 'recall']
  for n, metric in enumerate(metrics):
    name = metric.replace("_"," ").capitalize()
    plt.subplot(2,2,n+1)
    plt.plot(history.epoch, history.history[metric], color=colors[0], label='Train')
    plt.plot(history.epoch, history.history['val_'+metric],
             color=colors[0], linestyle="--", label='Val')
    plt.xlabel('Epoch')
    plt.ylabel(name)
    if metric == 'loss':
      plt.ylim([0, plt.ylim()[1]])
    elif metric == 'auc':
      plt.ylim([0.8,1])
    else:
      plt.ylim([0,1])

    plt.legend();   

#### Descripcion del conjunto de datos: https://www.kaggle.com/mlg-ulb/creditcardfraud

The dataset contains transactions made by credit cards in September 2013 by European cardholders.
This dataset presents transactions that occurred in two days, where we have 492 frauds out of 284,807 transactions. The dataset is highly unbalanced, the positive class (frauds) account for 0.172% of all transactions.

It contains only numerical input variables which are the result of a PCA transformation. Unfortunately, due to confidentiality issues, we cannot provide the original features and more background information about the data. Features V1, V2, … V28 are the principal components obtained with PCA, the only features which have not been transformed with PCA are 'Time' and 'Amount'. Feature 'Time' contains the seconds elapsed between each transaction and the first transaction in the dataset. The feature 'Amount' is the transaction Amount, this feature can be used for example-dependant cost-sensitive learning. Feature 'Class' is the response variable and it takes value 1 in case of fraud and 0 otherwise.

In [None]:
#file = tf.keras.utils
raw_df = pd.read_csv('https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv')
raw_df.head()

In [None]:
###Visualizacion del problema!! Mas clases 0 (no fraude) que 1 (fraude)
hist = raw_df['Class'].hist(bins=3)

In [None]:
neg, pos = np.bincount(raw_df['Class'])
total = neg + pos
print('Muestras:\n    Total: {}\n    Positivos: {} ({:.2f}% del total)\n    Negativos: {} ({:.2f}% del total)\n'.format(
    total, pos, 100 * pos / total,neg, 100 * neg / total))

In [None]:
cleaned_df = raw_df.copy()

## remover la columna Tiempo (no vamos a realizar series de tiempo!!)

cleaned_df.#Complete codigo

## Transformar la columna amount en escala logaritmica para que los valores sean mas pequenos!
eps = 0.001
cleaned_df['Log Ammount'] = np.log(cleaned_df.pop('Amount')+eps)

In [None]:
cleaned_df.head()

In [None]:
### split train-test!
train_data=cleaned_df.sample(frac=0.7,random_state=123)
test_data=cleaned_df.drop(train_data.index)
validation_data=test_data.sample(frac=0.5,random_state=123)
test_data=test_data.drop(validation_data.index)

In [None]:
### Normalizar respecto a los valores de training!
train_data_X=train_data.copy().pop('Class')
normalization = tf.keras.layers.Normalization(axis=-1)
normalization.adapt(train_data_X)

In [None]:
def df_to_dataset(dataframe, shuffle=True, repeat =False,batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('Class')
  ds = #completar codigo. Hint:use-> from_tensor_slice(x,y)
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  if repeat:
    ds= ds.repeat()
  ds = ds.batch(#completar codigo.)
  return ds

In [None]:
train_tf_data=df_to_dataset(train_data)
val_tf_data=df_to_dataset(validation_data,shuffle=False)
test_tf_data=df_to_dataset(test_data,shuffle=False)
for ds,lb in train_tf_data.take(1):
        print(ds.shape)
shaped =ds.shape

In [None]:
## Metricas usadas!!
METRICS = [
      tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn'), 
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
      tf.keras.metrics.AUC(name='prc', curve='PR'), 
]

### Modelo sin considerar imbalance

#### Crear un modelo de redes neuronales con la siguente estructura:
    -capa de entrada con shape=shaped[-1]
    -Incluir capa: normalization
    -capa densa de 16 neu. con relu
    -capa Dropout con rate=0.2
    -capa densa de 16 neu. con relu
    -capa Dropout con rate=0.2
    -capa densa de 8 neu. con relu
    -capa Dropout con rate=0.1
    -capa densa de salida (clasificacion binaria) con sigmoid

In [None]:


model_bare = tf.keras.Sequential([
      ##complete codigo
      keras.layers.Dense(1, activation='sigmoid'),
  ])

model_bare.compile(
      optimizer= ## use Adam opt con learning_rate=1e-3,
      loss= ##complete codigo,
      metrics=METRICS)


In [None]:
history_bare = model_bare.fit(
    ## datos de entrenamiento,
    ##8 epocas,
    ## datos de validacion,
    verbose=1)

In [None]:
evaluation(test_tf_data,model_bare)

In [None]:
plot_cm(test_tf_data,model_bare)

In [None]:
plot_roc(test_tf_data,model_bare)

In [None]:
plot_metrics(history_bare)

### Modelo usando bias_init

La correccion por bias ($b_0$) viene dado por:

$$ p_0 = pos/(pos + neg) = 1/(1+e^{-b_0}) $$
$$ b_0 = -log_e(1/p_0 - 1) $$
$$ b_0 = log_e(pos/neg)$$

In [None]:
initial_bias = ##Completar codigo
initial_bias =tf.keras.initializers.Constant(initial_bias)
initial_bias.value

#### Crear un modelo de redes neuronales con la siguente estructura:
    -capa de entrada con shape=shaped[-1]
    -Incluir capa: normalization
    -capa densa de 16 neu. con relu
    -capa Dropout con rate=0.2
    -capa densa de 16 neu. con relu
    -capa Dropout con rate=0.2
    -capa densa de 8 neu. con relu
    -capa Dropout con rate=0.1
    -capa densa de salida (clasificacion binaria) con sigmoid y correccion de bias!!

In [None]:
model_bias = tf.keras.Sequential([
      ##complete codigo
      keras.layers.Dense(1, activation='sigmoid',bias_initializer=initial_bias),
  ])

model_bias.compile(
      optimizer=## use Adam opt con learning_rate=1e-3,
      loss=##complete codigo
      metrics=METRICS)


In [None]:
history_bias = model_bias.fit(
    #datos entrenamiento
    #epocas 8
    #datos validacion 
    verbose=1)

In [None]:
evaluation(test_tf_data,model_bias)

In [None]:
plot_cm(test_tf_data,model_bias)

In [None]:
plot_roc(test_tf_data,model_bias)

In [None]:
plot_metrics(history_bias)

### Generar pesos en las clases

In [None]:
weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Peso para clase 0: {:.2f}'.format(weight_for_0))
print('Peso para clase 1: {:.2f}'.format(weight_for_1))

In [None]:
model_weight  = tf.keras.models.clone_model(model_bare)
model_weight.compile(
      optimizer=## use Adam opt con learning_rate=1e-3,
      loss= ##complete codigo
      metrics=METRICS)

In [None]:
history_weight = model_weight.fit(
    #datos entrenamiento
    # 8 epocas
    #datos de validacion 
    verbose=1,
    class_weight=class_weight) ##->Pesa las clases!!!!

In [None]:
evaluation(test_tf_data,model_weight)

In [None]:
plot_cm(test_tf_data,model_weight)

In [None]:
plot_roc(test_tf_data,model_weight)

In [None]:
plot_metrics(history_weight)

### Entrenar sobre oversampled data

In [None]:
## Dividir los datos entre pos y neg:
train_data_neg= train_data[train_data['Class']==0]
train_data_pos= train_data[train_data['Class']==1]
validation_data_neg= validation_data[validation_data['Class']==0]
validation_data_pos= validation_data[validation_data['Class']==1]

In [None]:
train_tf_data_neg=df_to_dataset(train_data_neg,repeat=True)
train_tf_data_pos=df_to_dataset(train_data_pos,repeat=True)
val_tf_data_neg=df_to_dataset(validation_data_neg,shuffle=False)
val_tf_data_pos=df_to_dataset(validation_data_pos,shuffle=False)
for ds,lb in train_tf_data_neg.take(1):
        print(ds.shape)
shaped =ds.shape
resampled_steps_per_epoch = np.ceil(2.0*len(train_data_neg)/shaped[0])
resampled_steps_per_epoch

### Preguntas:
    -Que implica los pesos (weights) en tf.data.Dataset.sample_from_datasets?
    -Cambiel los valores  de weights a: [0.9, 0.1],[0.1, 0.9], [0.3, 0.7], [0.7, 0.3] y analize sus resultados

In [None]:
weights =[0.5, 0.5] ## 50% pos, 50%neg
resampled_train = tf.data.Dataset.sample_from_datasets([train_tf_data_pos, train_tf_data_neg], weights=weights)
resampled_val = tf.data.Dataset.sample_from_datasets([val_tf_data_pos, val_tf_data_neg], weights=weights)

In [None]:
model_resampling = tf.keras.models.clone_model(model_bare)


model_resampling.compile(
      optimizer=## use Adam opt con learning_rate=1e-3,
      loss=##complete codigo
      metrics=METRICS)

history_resample = model_resampling.fit(
    resampled_train, ##datos de entrenamiento remuestrado
    epochs=18,
    validation_data=resampled_val, ##datos de validacion remuestrado
    steps_per_epoch=resampled_steps_per_epoch, ##reescalar los pasos por epoca!
    verbose=1)

In [None]:
evaluation(test_tf_data,model_resampling)

In [None]:
plot_cm(test_tf_data,model_resampling)

In [None]:
plot_roc(test_tf_data,model_resampling)

In [None]:
plot_metrics(history_resample)

### Synthetic Minority Oversampling Technique (SMOTE)

In [None]:
sm = SMOTE(random_state=123)
def _split_data(df):
    X =df.copy()
    y = X.pop('Class')
    return X,y

Xtr, ytr = _split_data(train_data)
Xv, yv = _split_data(validation_data)
Xts, yts = _split_data(test_data)
X_train, y_train = sm.fit_resample(Xtr,ytr)
X_val, y_val = sm.fit_resample(Xv,yv)
X_test, y_test = sm.fit_resample(Xts,yts)

In [None]:
## Ahora contamos con balanced data!!
hist = y_train.hist(bins=3)

## Parte 3. Realizar el modelo de deep learning para clasificacion usando la base de datos anteriormente balanceada con SMOTE
    -Genere el pipeline usando tensor_slices()
    -Entrene una red neuronal usando el modelo model_bare usado anteriormente (no use bias_init o weights en    las clases, recuerde que ya tenemos datos balanceados!)
    -use en el fit los  Callbacks relacionados con: EarlyStopping, y otro callback de la lista: https://www.tensorflow.org/api_docs/python/tf/keras/callbacks
    - Guarde el model completo y subalo al repositorio junto a este notebook completamente terminado.