# Practica 3. Regresión lineal con BGD
### Aprendizaje Máquina e Inteligencia Artificial
Francisco Pineda Hernández

In [52]:
# Librerías generales de uso
import pandas as pd
import numpy as np

## Batch Gradient Descent (BGD)

$$ w_i  = w_i - 2\alpha \sum_{i = 0}^{m - 1} (w_i x_{j, i} - y_i)x_{j, i} $$

In [125]:
def bgd(X, Y, W_inicial, alpha):
    W = []
    for i in range(X.shape[1]):
        aux2 = 0 
        for j in range(len(X)):
            #print(f"x={X[j, i]}, y={float(Y[j])}, i={i}, j={j}, w={W_inicial[i]}")
            aux2 += (W_inicial[i] * X[j, i] - float(Y[j])) * X[j, i]
        aux = (W_inicial[i] - 2 * alpha * aux2)
        W.append(aux)
        aux = 0
    return W

## Programa 1

In [126]:
# Lectura dataset precio de viviendas programa 1
casas = pd.read_csv('casas.csv')
casas

Unnamed: 0,Terreno (m2),Precio (MDP)
0,440.0,1.01
1,616.0,1.42
2,381.0,0.88
3,963.0,2.21
4,431.0,0.99
5,255.0,0.59
6,594.0,1.37
7,625.0,1.44
8,708.0,1.63
9,468.0,1.08


In [223]:
from sklearn.model_selection import train_test_split # Importamos el método para dividir el dataset

def aplicar_bgd(dataset, iteraciones, W, alpha, error_ideal=0.01):
    if len(W) != len(dataset.iloc[0]) - 1:
        print("Las dimensiones del vector W y del dataset no coinciden")
        return
    
    error_actual = float('inf') 
    W_arreglo = []
    W_arreglo.append(W)
    errores_arreglo = []
    indice_correcto = 0
    
    # Dividir el dataset
    columna_objetivo = list(dataset.columns)[-1]
    dataset_train, dataset_test = train_test_split(dataset, train_size=0.7, test_size=0.3, shuffle=True, random_state=0)
    X_train = dataset_train.drop(columns=[columna_objetivo]).values
    Y_train = dataset_train[columna_objetivo].values.reshape(-1, 1)
    
    for i in range(iteraciones):
        if error_actual <= error_ideal: # Identificar si ya se alcanzo un error deseado
            print("\n ========= ERROR ALCANZADO :) ========= ")
            break

        print(f"\nITERACION {i}: W={W}, Alpha={alpha}")
        W = bgd(X_train, Y_train, W, alpha)
        suma_errores_iteracion = 0
        print(f"W nuevo = {W}")
        W_arreglo.append(W)

        # TEST's
        for j in range(len(dataset_test)):
            evaluacion = np.dot(dataset_test.iloc[j,:-1], W)
            error_test = abs(evaluacion - dataset_test[columna_objetivo].iloc[j])
            print(f"Test {i}: evaluacion = {evaluacion} | Y = {dataset_test[columna_objetivo].iloc[j]} ERROR=({error_test})")
            suma_errores_iteracion += error_test

        print(f"Suma total de errores = {suma_errores_iteracion}")
        if suma_errores_iteracion < error_actual:
            error_actual = suma_errores_iteracion
            errores_arreglo.append(suma_errores_iteracion)
            indice_correcto += 1
        else:
            alpha /= 2
            W = W_arreglo[indice_correcto]
            print(W_arreglo)
            print(f" ================================= Nuevo alpha = {alpha}, Nuevo W = {W}")

In [208]:
multivariable = pd.read_csv('Dataset_multivariable.csv')
multivariable

Unnamed: 0,x1,x2,x3,x4,x5,y
0,1.0,0.4,-12.0,12.0,10.2,11.6
1,1.0,0.6,-15.0,16.0,11.9,14.5
2,1.0,0.8,-18.0,20.0,13.6,17.4
3,1.0,1.0,-21.0,24.0,15.3,20.3
4,1.0,1.2,-24.0,28.0,17.0,23.2
5,1.0,1.4,-27.0,32.0,18.7,26.1
6,1.0,1.6,-30.0,36.0,20.4,29.0
7,1.0,1.8,-33.0,40.0,22.1,31.9
8,1.0,2.0,-36.0,44.0,23.8,34.8
9,1.0,2.2,-39.0,48.0,25.5,37.7


In [225]:
aplicar_bgd(multivariable, 100, [100] * 5, 0.01, 0.01)


ITERACION 0: W=[100, 100, 100, 100, 100], Alpha=0.01
W nuevo = [89.422, 76.8988, -10095.614, -14183.536000000004, -4601.5506000000005]
Test 0: evaluacion = -164379.81512000013 | Y = 17.4 ERROR=(164397.21512000012)
Test 0: evaluacion = -369907.1646800001 | Y = 34.8 ERROR=(369941.9646800001)
Test 0: evaluacion = -232888.93164000014 | Y = 23.2 ERROR=(232912.13164000015)
Suma total de errores = 767251.3114400004

ITERACION 1: W=[89.422, 76.8988, -10095.614, -14183.536000000004, -4601.5506000000005], Alpha=0.01
W nuevo = [80.32491999999999, 60.32137888, 1009261.8737199999, 2028362.1120000007, 214890.33916100007]
Test 1: evaluacion = 25323165.70765271 | Y = 17.4 ERROR=(25323148.307652712)
Test 1: evaluacion = 58029096.513789594 | Y = 34.8 ERROR=(58029061.7137896)
Test 1: evaluacion = 36225142.64303169 | Y = 23.2 ERROR=(36225119.44303168)
Suma total de errores = 119577329.464474
[[100, 100, 100, 100, 100], [89.422, 76.8988, -10095.614, -14183.536000000004, -4601.5506000000005], [80.324919999

  aux2 += (W_inicial[i] * X[j, i] - float(Y[j])) * X[j, i]


[[100, 100, 100, 100, 100], [89.422, 76.8988, -10095.614, -14183.536000000004, -4601.5506000000005], [80.32491999999999, 60.32137888, 1009261.8737199999, 2028362.1120000007, 214890.33916100007], [84.87346, 68.61008944, 499583.1298599999, 1007089.2880000004, 105144.39428050003], [87.14773, 72.75444472, 244743.75792999996, 496452.87600000016, 50271.42184025001], [88.284865, 74.82662235999999, 117324.07196499998, 241134.6700000001, 22834.935620125005], [88.8534325, 75.86271117999999, 53614.22898249999, 113475.56700000004, 9116.692510062503], [89.13771625, 76.38075558999999, 21759.307491249994, 49646.01550000002, 2257.5709550312513], [89.279858125, 76.639777795, 5831.846745624998, 17731.239750000008, -1171.9898224843746], [89.3509290625, 76.76928889749999, -2131.8836271875007, 1773.851875000002, -2886.7702112421875], [89.27993585883789, 76.64006352886989, -450.7899188016115, -220.82160937500043, -1810.8125009055962], [89.20902030399229, 76.51112326370932, -95.92154379702765, 28.51257617187