# Intro Deep Learning

La idea de este Notebook es mostrar una introducción a modelos de redes neuronales densas, cómo se componen, cómo se entrenan y similitudes con modelos más sencillos como la regresión lineal y logística. Cubriremos conceptos que definen la estructura de estos modelos y los parámetros organizan su aprendizaje.

In [None]:
!git clone https://github.com/JuanCruzC97/ml-stuff.git

In [None]:
cd ml-stuff/intro-deep-learning

In [None]:
import numpy as np
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go

from utils.datasets import make_regression_dataset

In [59]:
seed_value= 45

import os
os.environ['PYTHONHASHSEED']=str(seed_value)

# Regresión

En esta primera parte cubrimos un problema muy simple de regresión (predicción de una variable continua). Usaremos una única variable explicativa. Comenzamos generando el dataset que utilizaremos en esta etapa. Se trata de un set de datos sencillo, con una variable explicativa `X` y una variable respuesta `y` continua.

## Dataset



In [60]:
train = make_regression_dataset(n_samples=100, noise=1.25, random_state=42)
test = make_regression_dataset(n_samples=30, noise=1.25, random_state=65)

In [61]:
train.head()

Unnamed: 0,X,y
0,-1.003679,-2.201476
1,3.605714,5.68287
2,1.855952,5.638039
3,0.789268,0.695674
4,-2.751851,-1.445551


In [62]:
train.describe()

Unnamed: 0,X,y
count,100.0,100.0
mean,-0.238554,1.697763
std,2.379915,3.485939
min,-3.955823,-4.706355
25%,-2.454394,-1.157034
50%,-0.28686,1.241799
75%,1.841625,4.993439
max,3.895095,8.287708


In [63]:
px.scatter(data_frame=train,
           x="X",
           y="y",
           #color="y",
           #color_continuous_scale="viridis",
           height=500,
           width=800,
           template="plotly_white")

# Linear Model

In [8]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

In [9]:
linear_model_sk = LinearRegression()

linear_model_sk.fit(X=train[["X"]],
                    y=train["y"])

LinearRegression()

In [10]:
print(linear_model_sk.coef_)
print(linear_model_sk.intercept_)

[1.02640991]
1.9426174913286312


In [11]:
train["y_preds1"] = linear_model_sk.predict(train[["X"]])
test["y_preds1"] = linear_model_sk.predict(test[["X"]])

print(f'Error Absoluto Promedio {round(mean_absolute_error(train["y"], train["y_preds1"]), 2)}')
print(f'Error Absoluto Promedio {round(mean_absolute_error(test["y"], test["y_preds1"]), 2)}')

Error Absoluto Promedio 2.02
Error Absoluto Promedio 1.84


In [14]:
color = "y_preds1"

plot = px.scatter(data_frame=train,
                  x = "X",
                  y = ["y", "y_preds1"],
                  #color=color,
                  #color_continuous_scale="viridis",
                  #trendline='ols', 
                  height=500,
                  width=800,
                  template ="plotly_white")

#plot.add_traces()

plot.show()

In [15]:
color = "y_preds"

plot = px.scatter(data_frame=train,
                  x = "X",
                  y = train["y"] - train["y_preds1"],
                  #color=color,
                  #color_continuous_scale="viridis", 
                  height=500,
                  width=800,
                  template ="plotly_white")

#plot.add_traces()

plot.show()

## Featuring Engineering

In [55]:
train["sin_X1"] = np.sin(train["X1"])

In [56]:
linear_model_sk = LinearRegression()

linear_model_sk.fit(X=train[["X1", "sin_X1"]],
                    y=train["y"])

LinearRegression()

# Dense Neural Net

In [18]:
import tensorflow as tf
from tensorflow import keras

from utils.datasets import get_fit_data, get_training_preds

In [118]:
LOSS = "mae"
BATCH_SIZE = 20
EPOCHS = 20
LR = 0.1

In [None]:
#keras.backend.clear_session()
#tf.random.set_seed(42)

initial_weights = keras.initializers.RandomNormal(0, 0.05, 42)

inputs = keras.Input(shape=(1,))
outputs = keras.layers.Dense(1, kernel_initializer=initial_weights)(inputs)

model1 = keras.Model(inputs, outputs)

model1.compile(optimizer=keras.optimizers.SGD(learning_rate=LR), loss=LOSS)

fit_history = model1.fit(x=train[["X"]], y=train["y"], batch_size=BATCH_SIZE, epochs=EPOCHS, shuffle=True)

In [34]:
[linear_model_sk.coef_[0], linear_model_sk.intercept_]

[1.026409914187006, 1.9426174913286312]

In [125]:
print(f'Error Absoluto Promedio {round(mean_absolute_error(train["y"], model1.predict(train[["X"]])), 2)}')
print(f'Error Absoluto Promedio {round(mean_absolute_error(test["y"], model1.predict(test[["X"]])), 2)}')

Error Absoluto Promedio 1.99
Error Absoluto Promedio 1.74


In [181]:
initial_weights = keras.initializers.RandomNormal(-0.5, 0.25, 123)

inputs = keras.Input(shape=(1,))
outputs = keras.layers.Dense(1, kernel_initializer=initial_weights)(inputs)

model1 = keras.Model(inputs, outputs)

model1.compile(optimizer=keras.optimizers.SGD(learning_rate=LR), loss=LOSS)

df_weights, df_preds = get_training_preds(train, model1, BATCH_SIZE, EPOCHS)

In [184]:
px.scatter(data_frame=df_preds,
           x="X",
           y=["y", "y_pred"],
           animation_frame="epoch",
           height=500,
           width=800,
           template="plotly_white")

In [203]:
def get_fit_data(history, loss_name='loss'):
    
    df = pd.DataFrame(history.history)
    
    df = (df
          .assign(epoch=np.arange(1, df.shape[0]+1))
          .set_index("epoch")
          .rename(columns={'loss':loss_name}))
    
    return df

data = get_fit_data(fit_history, 'mae')

In [205]:
px.line(data, 
        x=data.index, 
        y="mae",
        height=500,
        width=800,
        template="plotly_white")

In [126]:
color = "y_preds"

plot = px.scatter(data_frame=train,
                  x = "X1",
                  y = ["y", "y_preds"],
                  #color=color,
                  #color_continuous_scale="viridis",
                  trendline='ols',
                  height=500,
                  width=800,
                  template ="plotly_white")

#plot.add_traces()

plot.show()

ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of ['X', 'y'] but received: X1

# Old

In [None]:
px.histogram(train, "X1", nbins=20, height=600, width=600, template="plotly_white")

In [None]:
px.scatter_3d(data_frame=train,
              x="X1",
              y="X2",
              z="y",
              color="y",
              color_continuous_scale="viridis",
              height=700,
              width=600,
              template="plotly_white")



px.scatter(data_frame=train,
           x = "X2",
           y = "y",
           color="y",
           color_continuous_scale="viridis",
           height=500,
           width=800,
           template ="plotly_white").show()

px.scatter(data_frame=train,
           x = "X1",
           y = "X2",
           color = "y",
           color_continuous_scale="viridis",
           height=500,
           width=800,
           template ="plotly_white").show()