# Modelo de regresión lineal 

### Carlos Alberto Mentado Reyes A01276065

In [153]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split

 Primero definiré funciones necesarias, como costo, coeficiente de determinación etc 

In [154]:
# Función de covarianza y correlación

def covariance(x, y):
    y_mean = y.mean()
    x_mean = x.mean()
    cov = 0
    for i in range(len(x)):
        cov += (x[i] - x_mean) * (y[i] - y_mean)

    return cov / (len(x) - 1)

def correlation(x, y):
    return covariance(x, y) / (x.std() * y.std())


In [155]:
# Función predict y función costo 

def predict(x, w, b):
    return w * x + b

def cost(y, y_pred):
    cost = 0
    for i in range(0, len(y)):
        cost += (y.iloc[i] - y_pred[i]) ** 2
    return cost / len(y)

In [156]:
# Función descenso de gradiente w y b 

def b_gradient_descent(x, w, b, lr,y):
    n = len(x)
    partial_b_sum = 0
    for i in range(0, n):
        partial_b_sum += (y.iloc[i] - b - (w*x.iloc[i])) * (-1)

    partial_b = partial_b_sum * (2/n)

    return b - lr*(partial_b)



def w_gradient_descent(x, w, b, lr, y):
    n = len(x)
    partial_w_sum = 0
    for i in range(0, n):
        partial_w_sum += (y.iloc[i]-b-(w*x.iloc[i])) * ((-1)*x.iloc[i])
    partial_w = partial_w_sum * (2/n)

    return w - lr * partial_w


In [157]:
#Función general

def trainModel(x, y, lr, max_iter):
    if len(x) != len(y):
        print("size of arrays for features and targets do not match")
        return

    np.random.seed(42)
    w = 1
    b = 1
    model_cost = float("inf")
    iter_cost = float("inf")

    for i in range(0, max_iter):
        predictions=[]

        for j in range(0, len(x)):
            predictions.append(predict(x.iloc[j], w, b))
        
        iter_cost = cost(y, predictions)
        if (iter_cost>model_cost):
            break
        model_cost = iter_cost
        w = w_gradient_descent(x, w, b, lr, y)
        b = b_gradient_descent(x, w, b, lr, y)

    return w, b

def testModel(w, b, x, y):
    predictions = []
    for i in range(0, len(x)):
        predictions.append(predict(x.iloc[i], w, b))
    
    results = cost(y, predictions)

    return results


In [158]:
#Funcion para entrenar y testear 

def linearModel(x_train, x_test, y_train, y_test, lr=0.1, max_iter=100):
    print("Beggining training")
    print(f"Max iterations: {max_iter}")
    print(f"Learning rate: {lr}")
    w, b = trainModel(x_train, y_train, lr, max_iter)

    results = testModel(w, b, x_test, y_test)

    print("Final results:")
    print(f"y = {np.mean(w)}x + {np.mean(b)}")
    print(f"Final cost: {np.mean(results)}")


In [159]:
#Pequeña prueba

#Bases de datos a utilizar
url = "https://raw.githubusercontent.com/allisonhorst/palmerpenguins/c19a904462482430170bfe2c718775ddb7dbb885/inst/extdata/penguins_raw.csv"

penguins = pd.read_csv(url)
penguins_target = penguins["Body Mass (g)"].dropna()
penguins_feature = penguins["Culmen Length (mm)"].dropna()

X_train, X_test, y_train, y_test = train_test_split(
    penguins_feature, penguins_target, test_size=0.33, random_state=42
)

linearModel(
    X_train,
    X_test,
    y_train,
    y_test,
    0.001,
    100
)



Beggining training
Max iterations: 100
Learning rate: 0.001
Final results:
y = 373.13530427947603x + -23.37601529894488
Final cost: 150789081.0495064
