In [1]:
#imports
import pandas as pd
import numpy as np
import tensorflow as tf
import sys

In [2]:
dataset = np.load("proyecto_training_data.npy")
dataset = dataset[~np.isnan(dataset).any(axis=1), :] #Dataset contiene nans, los ignoro en la implementacion
fields = ["SalePrice", "OverallQual", "1stFlrSF", "TotRmsAbvGrd", "YearBuilt", "LotFrontage"]
print(np.vstack((np.array(fields), dataset)))

[['SalePrice' 'OverallQual' '1stFlrSF' 'TotRmsAbvGrd' 'YearBuilt'
  'LotFrontage']
 ['208500.0' '7.0' '856.0' '8.0' '2003.0' '65.0']
 ['181500.0' '6.0' '1262.0' '6.0' '1976.0' '80.0']
 ...
 ['266500.0' '7.0' '1188.0' '9.0' '1941.0' '66.0']
 ['142125.0' '5.0' '1078.0' '5.0' '1950.0' '68.0']
 ['147500.0' '5.0' '1256.0' '6.0' '1965.0' '75.0']]


In [14]:
class LinearModel:
    def __init__(self, batch_size = 16, lr=0.01, epochs = 1000):
        self.logdir = f'logs\\lm_bs={batch_size}_lr={lr}_epochs={epochs}'
        self.writer = tf.summary.create_file_writer(self.logdir)
        self.m = None
        self.b = None
        
        self.batch_size = batch_size 
        self.lr = lr 
        self.epochs = epochs
        
    def h(self, m, b, x):
        with tf.name_scope("casting_variables"):
            y = tf.cast(m, tf.float64)*tf.cast(x, tf.float64) + tf.cast(b, tf.float64)
        return y
    
    def get_params(self):
        return((self.m, self.b))
    
    def error(self, y,y_pred):
        with tf.name_scope("error_calculation"):
            return 1/2*tf.reduce_mean(tf.math.square(y - y_pred), name="MSE_Calc")

    def __call__(self,x):
        return self.h(self.m, self.b, x)
    
    @tf.function
    def actual_training(self, x, y):
        with tf.name_scope("main_graph"):
            batch_size = self.batch_size
            lr = self.lr 
            epochs = self.epochs
            error = None
            with tf.name_scope("var_creation"):
                if self.m is None:
                    self.m = tf.Variable(initial_value=0.0, name="slope")
                if self.b is None:
                    self.b = tf.Variable(initial_value=0.0, name="intercept")

            iterations = int(len(y)/batch_size)
            step = 0

            for epoch in range(epochs):
                for i in range(iterations): 
                    batch_start = i*batch_size
                    batch_end = batch_start + batch_size
                    #x_mb = tf.reshape(x[batch_start:batch_end], [-1,1])
                    #y_mb = tf.reshape(y[batch_start:batch_end], [-1,1])
                    x_mb = x[batch_start:batch_end]
                    y_mb = y[batch_start:batch_end]

                    with tf.name_scope("gradient_calc"):
                        with tf.GradientTape() as grad_tape:
                            grad_tape.watch(self.b)
                            grad_tape.watch(self.m)

                            y_pred = self.h(self.m, self.b, x_mb)

                            error = self.error(y_mb, y_pred)

                            # calcular el gradiente de la funcion de costo respecto de los parametros
                            grad_m,grad_b = grad_tape.gradient(error,[self.m, self.b])

                    with tf.name_scope("parameter_updating"):
                        # actualizar los parametros dando un paso en direccion contraria al gradiente
                        self.m.assign(self.m - lr*grad_m)
                        self.b.assign(self.b - lr*grad_b)

                    tf.summary.scalar('MSE', error, step=step)
                    step += 1
            final_params = (self.m, self.b)
        return final_params
        
    
    def train(self, x, y):
        with self.writer.as_default():
            tf.summary.trace_on(graph=True, profiler=False)
            (final_params) = self.actual_training(x, y)
            tf.summary.trace_export(
                                  name="model_graph",
                                  step=0,
                                  profiler_outdir="logs\\modelgraph")
        self.writer.flush()

        return final_params

In [22]:
tf.config.run_functions_eagerly(False)
model1 = LinearModel(epochs = 1, lr = 0.0001, batch_size=len(dataset[:,0]))
model1.train(dataset[:,1], dataset[:,0])

(<tf.Tensor: shape=(), dtype=float32, numpy=120.16186>,
 <tf.Tensor: shape=(), dtype=float32, numpy=18.077047>)

In [16]:
print(model1(1))

tf.Tensor(138.23890495300293, shape=(), dtype=float64)


In [17]:
model1.get_params()

(<tf.Variable 'main_graph/var_creation/slope:0' shape=() dtype=float32, numpy=120.16186>,
 <tf.Variable 'main_graph/var_creation/intercept:0' shape=() dtype=float32, numpy=18.077047>)

# Experimentos

Hipotesis: Un LR alto causara una reduccion de error mas rapidamente pero causara convergencia mas tardia
Un epoch alto alcanzara convergencia de error antes de que termine de entrenar todas la epochs
Un batch_size bajo podra suplir la misma informacion que un batch size alto, pero se tardara mas en entrenarse

In [19]:
import itertools
tf.config.run_functions_eagerly(True)
lrs = [0.1, 0.01, 0.001]
epochs = [100, 1000]
batch_sizes = [400, 600]

for lr, epoch, batch_size in list(itertools.product(lrs, epochs, batch_sizes)):
    print(f"training model with params epochs: {epoch}, lr: {lr}, batch_size: {batch_size}")
    model = LinearModel(epochs = epoch, lr = lr, batch_size = batch_size)
    model.train(dataset[:,1], dataset[:,0])
    print("Done")
    print()

training model with params epochs: 100, lr: 0.1, batch_size: 400
Done

training model with params epochs: 100, lr: 0.1, batch_size: 600
Done

training model with params epochs: 1000, lr: 0.1, batch_size: 400
Done

training model with params epochs: 1000, lr: 0.1, batch_size: 600
Done

training model with params epochs: 100, lr: 0.01, batch_size: 400
Done

training model with params epochs: 100, lr: 0.01, batch_size: 600
Done

training model with params epochs: 1000, lr: 0.01, batch_size: 400
Done

training model with params epochs: 1000, lr: 0.01, batch_size: 600
Done

training model with params epochs: 100, lr: 0.001, batch_size: 400
Done

training model with params epochs: 100, lr: 0.001, batch_size: 600
Done

training model with params epochs: 1000, lr: 0.001, batch_size: 400
Done

training model with params epochs: 1000, lr: 0.001, batch_size: 600
Done



# Graficas de TensorBoard

![alt text](ALL_MSE.PNG "MSEs")

Parece que existen basicamente dos tipos de modelo, aquellos que superaron alguna especie de barrera que les permitio reducir mucho mas el error y aquellos que no lo lograron.

Vemos que con los modelos con mas iteraciones totales si lograron converger a un error mas bajo en general, asi que veamos estos modelos:

![alt text](Interesting_models.PNG "Modelos interesantes")

Parece ser que la "barrera" a superar es el learning rate, siendo un lr de 0.01 capaz de reducir de mejor manera el error del modelo. Veamos estos modelos mas a fondo.

![alt text](best_models.PNG "Mejores modelos")

Podemos ver que un batch size mas pequeño tiene el potencial de obtener un error mas pequeño, pero la varianza entre cada iteracion es bastante grande y levanta sospechas sobre su confiabilidad

Por otra parte, un batch size mas grande no solo logra reducir la variabilidad, sino que tambien parece converger en menos iteraciones, por lo que resulta un modelo mas rapido de entrenar

Podemos concluir entonces que el mejor modelo tiene bastantes epochs (1000), con un batch size mas grande (600) y un lr intermedio (0.01) de entre los hiperparametros entrenados

### Grafo

![alt text](graph.PNG "Grafo")