# Modelo de regresión lineal 

### Carlos Alberto Mentado Reyes A01276065

In [262]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split

 Primero definiré funciones necesarias, como costo, coeficiente de determinación etc 

In [263]:
# Función de covarianza y correlación

def covariance(x, y):
    y_mean = y.mean()
    x_mean = x.mean()
    cov = 0
    for i in range(len(x)):
        cov += (x[i] - x_mean) * (y[i] - y_mean)

    return cov / (len(x) - 1)

def correlation(x, y):
    return covariance(x, y) / (x.std() * y.std())

def standardize(x):
    return (x - x.mean()) / x.std()


In [264]:
# Función predict y función costo 

def predict(x, w, b):
    return w * x + b

def cost(y, y_pred):
    cost = 0
    for i in range(0, len(y)):
        cost += (y.iloc[i] - y_pred[i]) ** 2
    return cost / len(y)

In [265]:
# Función descenso de gradiente w y b 

def b_gradient_descent(x, w, b, lr,y):
    n = len(x)
    partial_b_sum = 0
    for i in range(0, n):
        partial_b_sum += (y.iloc[i] - b - (w*x.iloc[i])) * (-1)

    partial_b = partial_b_sum * (2/n)

    return b - lr*(partial_b)



def w_gradient_descent(x, w, b, lr, y):
    n = len(x)
    partial_w_sum = 0
    for i in range(0, n):
        partial_w_sum += (y.iloc[i]-b-(w*x.iloc[i])) * ((-1)*x.iloc[i])
    partial_w = partial_w_sum * (2/n)

    return w - lr * partial_w


In [266]:
#Función general

def trainModel(x, y, lr, max_iter):
    tol = 1e-6
    if len(x) != len(y):
        print("size of arrays for features and targets do not match")
        return

    np.random.seed(42)
    w = 0.1
    b = 0.1
    model_cost = float("inf")
    iter_cost = float("inf")

    for i in range(0, max_iter):
        predictions=[]

        for j in range(0, len(x)):
            predictions.append(predict(x.iloc[j], w, b))

        print(f"current cost: {iter_cost}")
        iter_cost = cost(y, predictions)
        if abs(model_cost - iter_cost) < tol: 
            break
        model_cost = iter_cost
        w = w_gradient_descent(x, w, b, lr, y)
        b = b_gradient_descent(x, w, b, lr, y)

    return w, b

def testModel(w, b, x, y):
    predictions = []
    for i in range(0, len(x)):
        predictions.append(predict(x.iloc[i], w, b))
    
    results = cost(y, predictions)

    return results


In [267]:
#Funcion para entrenar y testear 

def linearModel(x_train, x_test, y_train, y_test, lr=0.1, max_iter=100):
    print("Beggining training")
    print(f"Max iterations: {max_iter}")
    print(f"Learning rate: {lr}")
    w, b = trainModel(x_train, y_train, lr, max_iter)

    results = testModel(w, b, x_test, y_test)

    print("Final results:")
    print(f"y = {np.mean(w)}x + {np.mean(b)}")
    print(f"Final cost: {np.mean(results)}")


In [268]:
#Pequeña prueba

#Bases de datos a utilizar
url = "https://raw.githubusercontent.com/allisonhorst/palmerpenguins/c19a904462482430170bfe2c718775ddb7dbb885/inst/extdata/penguins_raw.csv"

penguins = pd.read_csv(url).dropna()
penguins_target = penguins["Body Mass (g)"]
penguins_feature = penguins["Culmen Length (mm)"]
penguins_feature = standardize(penguins_feature)

X_train, X_test, y_train, y_test = train_test_split(
    penguins_feature, penguins_target, test_size=0.33, random_state=42
)

linearModel(
    X_train,
    X_test,
    y_train,
    y_test,
    0.1,
    100
)



Beggining training
Max iterations: 100
Learning rate: 0.1
current cost: inf
current cost: 16407849.959740626
current cost: 10570977.863947988
current cost: 6925621.522596398
current cost: 4636039.335415845
current cost: 3189948.007610361
current cost: 2271620.175635188
current cost: 1685378.3122367265
current cost: 1309258.4062342688
current cost: 1066808.1423447442
current cost: 909833.3480442289
current cost: 807785.3926811385
current cost: 741197.0236247206
current cost: 697599.2018673297
current cost: 668966.6905937948
current cost: 650110.9350925758
current cost: 637663.2822971906
current cost: 629428.1984422355
current cost: 623969.7001825086
current cost: 620345.598801783
current cost: 617935.9332965757
current cost: 616331.7306444171
current cost: 615262.5930609788
current cost: 614549.3883826123
current cost: 614073.2391103422
current cost: 613755.1347091094
current cost: 613542.4924726266
current cost: 613400.2778879764
current cost: 613305.1254155939
current cost: 613241.438

### El siguiente paso será pasar todo el modelo a un .py y permitir que se corra el modelo desde otro .py