In [48]:
# PARTE 1
import pandas as pd
import numpy as np
import math
e = np.e 

def normalizacion(X):
    n , m = X.shape
    medias = np.zeros(m)
    varianzas = np.zeros(m)
    for i in range(m):
        medias[i] = X[:,i].mean()
        varianzas[i] = X[:,i].std()
    for i in range(m):
        X[:,i] = (X[:,i] - medias[i])/varianzas[i]
    return X

def sigmoide(z):
    return 1/(1 + np.exp(-z))
    
def gradiente_logistica_explicito(X,y,theta):
    n, m = X.shape
    grad = np.zeros(m)
    for i in range(n):
        zi = 0
        for j in range(m):
            zi +=X[i,j]*theta[j]
        h_theta = sigmoide(zi)
        for j in range(m):
            grad[j] =grad[j] + (h_theta - y[i]) * X[i,j]
    return grad/n
    
def gradiente_logistica(X,y,theta):
    z = X @ theta 
    h_theta = sigmoide(z) 
    return X.T @ (h_theta - y)
    
def descenso_gradiente_logistica_L2(X,y,alpha=0.01,N=1000,lam=1):
    n, m = X.shape
    theta = np.zeros(m)
    for _ in range(N):
        grad = gradiente_logistica(X,y,theta)
        grad[0] = grad[0]/n
        for j in range(1,m):
            grad[j] = grad[j]/n + (lam/n)*theta[j]
        theta = theta - alpha * grad
    return theta

def entrenar(Xs,y):
    m = None
    X = None
    if Xs.ndim == 1:
        m = 2
        Xs = (Xs - Xs.mean())/Xs.std()
        X = np.ones((len(Xs), m))
        X[:,1] = Xs
    else:
        n, m = Xs.shape
        Xs = normalizacion(Xs)
        X = np.ones((n, m + 1))
        for i in range(m):
            X[:,i+1] = Xs[:,i]
        print(f"X:\n{X}")
    theta_final = descenso_gradiente_logistica_L2(X, y)
    return theta_final

def calcular_tp(y_real,y_pred):
    tp = 0
    n = len(y_real)
    for i in range(n):
        if y_real[i] == 1 and y_pred[i] == 1:
            tp +=1
    return tp
# pero dentro de evaluar_nodelo se hace en una linea  tp = np.sum((y_real==1) & (y_pred==1))

def evaluar_modelo(X, y_real, theta):
    z = X @ theta
    probs = sigmoide(z)
    y_pred = (probs > 0.5).astype(int)
    tp = np.sum((y_real==1) & (y_pred==1))
    tn = np.sum((y_real==0) & (y_pred==0))
    fp = np.sum((y_real==0) & (y_pred==1))
    fn = np.sum((y_real==1) & (y_pred==0))
    accuracy = (tp + tn)/len(y_real)
    precision = tp/(tp + fp) if (tp+fp)>0 else 0
    recall = tp/(tp + fn) if (tp+fn)>0 else 0
    f1 = 2*(precision*recall)/(precision + recall) if (precision + recall)>0 else 0
    return {"matriz": [[tn,fp],[fn,tp]],
            "accuracy": accuracy,
            "precision": precision,
            "recall": recall,
            "f1": f1}

def imprimir_metricas(metrics, nombres_clases=['-', '+']): 
    matriz = metrics['matriz']
    df_matriz = pd.DataFrame(
        matriz,
        index=nombres_clases,
        columns=['Pred 0', 'Pred 1']
    )
    print("Matriz de confusión:")
    print(f"matriz\n:{df_matriz}")
    print("Métricas del modelo:")
    for metrica in ['accuracy', 'precision', 'recall', 'f1']:
        valor = metrics[metrica]
        print(f"{metrica}: {valor:.4f}")

def odds_ratio(theta1):
    odds = e**theta1
    print(f"Odds Ratio: {odds:.4f}")
    print(f"Interpretación: Por cada unidad extra en TotLnsLses_Assets, los odds de ser débil se multiplican por {odds:.2f}.")
 
if __name__=='__main__':
    #a. 
    print(f"\n\na:\n\n")
    df = pd.read_csv("Banks.csv")
    print(df)
    X = df[['TotExp/Assets','TotLns&Lses/Assets']].values
    y = (df['Financial Condition']==1).astype(int).values  # WEAK=1 (ARBITRARIO), strong=0
    print(f"X:\n{X}")
    print(f"y:\n{y}") 
    theta = entrenar(X, y)
    print(f"\nTheta final:\n{theta}")
    odds_ratio(theta[1])
    # b. 
    print(f"\n\nb:\n\n")
    X_b = np.array([0.6, 0.11])
    print(f"X_b:\n{X_b}")
    X_b1 = np.ones(len(X_b)+ 1)
    X_b1[1:] = X_b
    print(f"X_b1:\n{X_b1}")
    z_b1 = X_b1 @ theta
    h_theta = sigmoide(z_b1)
    odds_b = np.exp(z_b1)
    clasificacion = 1 if h_theta >= 0.5 else 0
    print(f"\nNuevo banco (TotLnsLses/Assets=0.6, TotExp/Assets=0.11):")
    print(f"Logit: {z_b1:.4f}")
    print(f"Odds: {odds_b:.4f}")
    print(f"Probabilidad (weak): {h_theta:.4f}")
    print(f"Clasificación: {'weak' if clasificacion==1 else 'strong'}")
    #c. 
    print(f"\n\nc\n\n")
    cutoff_prob = 0.5
    threshold_odds = cutoff_prob/(1-cutoff_prob)
    threshold_logit = np.log(threshold_odds)
    print(f"\nUmbral basado en odds: {threshold_odds:.4f}")
    print(f"Umbral basado en logit: {threshold_logit:.4f}")
    #e.
    metrics = evaluar_modelo(X_input, y, theta)
    imprimir_metricas(metrics) 
    print("\nSi clasificar un banco débil como fuerte es más costoso, disminuir el cutoff para que sea más fácil clasificar como débil.")


























a:


    Obs  Financial Condition  TotCap/Assets  TotExp/Assets  TotLns&Lses/Assets
0     1                    1            9.7           0.12                0.65
1     2                    1            1.0           0.11                0.62
2     3                    1            6.9           0.09                1.02
3     4                    1            5.8           0.10                0.67
4     5                    1            4.3           0.11                0.69
5     6                    1            9.1           0.13                0.74
6     7                    1           11.9           0.10                0.79
7     8                    1            8.1           0.13                0.63
8     9                    1            9.3           0.16                0.72
9    10                    1            1.1           0.16                0.57
10   11                    0           11.1           0.08                0.43
11   12                    0           20.5  

In [46]:
# PARTE 2
import pandas as pd
import numpy as np
import math
e = np.e 

def gradiente_logistica(X,y,theta):
    print(f"X\n{X}")
    print(f"X.T\n{X.T}")
    print(f"theta\n{theta}")
    print(f"y\n{y}")
    z = X @ theta
    print(f"z:\n{z}") 
    h_theta = sigmoide(z) 
    print(f"h_theta, p:\n{h_theta}")
    return X.T @ (h_theta - y)
    
def descenso_gradiente_logistica_L2(X,y,alpha=1,N=2,lam=1):
    n, m = X.shape
    theta = np.array([0,1,0])
    for i in range(N):
        print(f"iteracion\n\n i{i+1}")
        grad = gradiente_logistica(X,y,theta)
        print(f"grad:\n{grad}")
        #grad[0] = grad[0]/n , no se penaliza w0
        for j in range(m):
            grad[j] = grad[j]/n + (lam/n)*theta[j]
        theta = theta - alpha * grad
        print(f"theta = theta - alpha * grad:\n{theta}")
    return theta
    
X = np.array([[1,0,1],[1,1,0],[1,1,2],[1,2,1],[1,0,2],[1,2,2]])
y = np.array([0,0,1,1,1,0]) 

descenso_gradiente_logistica_L2(X,y)


iteracion

 i1
X
[[1 0 1]
 [1 1 0]
 [1 1 2]
 [1 2 1]
 [1 0 2]
 [1 2 2]]
X.T
[[1 1 1 1 1 1]
 [0 1 1 2 0 2]
 [1 0 2 1 2 2]]
theta
[0 1 0]
y
[0 0 1 1 1 0]
z:
[0 1 1 2 0 2]
h_theta, p:
[0.5        0.73105858 0.73105858 0.88079708 0.5        0.88079708]
grad:
[1.22371131 1.98530547 0.60450839]
theta = theta - alpha * grad:
[-0.20395189  0.50244909 -0.1007514 ]
iteracion

 i2
X
[[1 0 1]
 [1 1 0]
 [1 1 2]
 [1 2 1]
 [1 0 2]
 [1 2 2]]
X.T
[[1 1 1 1 1 1]
 [0 1 1 2 0 2]
 [1 0 2 1 2 2]]
theta
[-0.20395189  0.50244909 -0.1007514 ]
y
[0 0 1 1 1 0]
z:
[-0.30470328  0.2984972   0.09699441  0.70019489 -0.40545468  0.59944349]
h_theta, p:
[0.42440813 0.5740751  0.52422961 0.66823098 0.4000025  0.64552898]
grad:
[ 0.2364753   0.72582463 -0.76783871]
theta = theta - alpha * grad:
[-0.20937246  0.2977368   0.04401362]


array([-0.20937246,  0.2977368 ,  0.04401362])