In [119]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plot
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [121]:
def sigmoid_function(x: float) -> float:
    return 1 / (1 + np.exp(-x))

def cost_function(y_true: np.array, y_pred: np.array) -> float:
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

def gradient_descent(X: np.ndarray, y: np.ndarray,num_iters: int, learning_rate: float, ):
    num_samples, num_features = X.shape

    weights = np.zeros(num_features)
    bias = 0

    for i in range(num_iters):
        y_pred = sigmoid_function( np.dot(X,weights) + bias)

        dw = ( 1 / num_samples ) * np.dot(X.T, (y_pred - y))
        db = ( 1 / num_samples ) * np.sum(y_pred - y)

        weights = weights - learning_rate * dw
        bias = bias - learning_rate * db

        cost = cost_function(y,y_pred)

    return weights, bias

def predict(X: np.ndarray, weights: np.ndarray, bias: float):
    y_pred = sigmoid_function(np.dot(X,weights) + bias)
    y_pred_bin = [1 if x >= 0.5 else 0 for x in y_pred]
    return y_pred,y_pred_bin

In [122]:
np.random.seed(0)
X = np.random.randn(400,2)
y = np.random.randint(0,2,400)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

learning_rate = 0.001
n_iters = 1000

weights, bias = gradient_descent(X_train,y_train, n_iters, learning_rate)

In [123]:
y_pred, y_pred_bin = predict(X_test, weights,bias)
y_pred_bin

[0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0]

In [124]:
clf = LogisticRegression()

clf.fit(X_train, y_train)

y_pred_sklearn = clf.predict_proba(X_test)[:,1]
y_pred_sklearn_bin = clf.predict(X_test)
y_pred_sklearn_bin

array([0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1,
       1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
       0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0])

In [125]:

# Métricas para tu implementación
accuracy_custom = accuracy_score(y_test, y_pred_bin)
precision_custom = precision_score(y_test, y_pred_bin)
recall_custom = recall_score(y_test, y_pred_bin)
f1_custom = f1_score(y_test, y_pred_bin)

# Métricas para la implementación de Scikit-Learn
accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn_bin)
precision_sklearn = precision_score(y_test, y_pred_sklearn_bin)
recall_sklearn = recall_score(y_test, y_pred_sklearn_bin)
f1_sklearn = f1_score(y_test, y_pred_sklearn_bin)

print("Métricas para nuestra implementacion :")
print(f"Accuracy: {accuracy_custom}")
print(f"Precisión: {precision_custom}")
print(f"Recall: {recall_custom}")
print(f"F1-score: {f1_custom}")

print("\nMétricas para la implementación de Scikit-Learn:")
print(f"Accuracy: {accuracy_sklearn}")
print(f"Precisión: {precision_sklearn}")
print(f"Recall: {recall_sklearn}")
print(f"F1-score: {f1_sklearn}")

Métricas para nuestra implementacion :
Accuracy: 0.575
Precisión: 0.5892857142857143
Recall: 0.75
F1-score: 0.6599999999999999

Métricas para la implementación de Scikit-Learn:
Accuracy: 0.5625
Precisión: 0.5818181818181818
Recall: 0.7272727272727273
F1-score: 0.6464646464646464


In [113]:
def sigmoid_function(x: np.ndarray) -> np.ndarray:
    clipped_x = np.clip(x, -500, 500)  # Limita los valores de x para evitar desbordamiento
    return 1 / (1 + np.exp(-clipped_x))

def cost_function(y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-15) -> float:

    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)  # Limita las probabilidades predichas para evitar divisiones por cero
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

In [98]:
df = pd.read_csv("/home/gsu/ESCOM/regresiones/data/regresionLogistica.csv")
df.drop('User ID',axis=1, inplace=True)
df.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0


In [99]:
df["Gender"].unique()

array(['Male', 'Female'], dtype=object)

In [100]:
df["Gender"].value_counts()

Gender
Female    204
Male      196
Name: count, dtype: int64

In [101]:
diccionary_gender = {"Male": 0, "Female": 1}

def encoding_gender(gender: str, diccionary_gender: dict) -> int:
    return diccionary_gender.get(gender)

df["Gender"] = df["Gender"].apply(lambda x: encoding_gender(x, diccionary_gender))

In [102]:
df.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,0,19,19000,0
1,0,35,20000,0
2,1,26,43000,0
3,1,27,57000,0
4,0,19,76000,0


In [112]:
df.shape

(400, 4)

In [103]:
y = df["Purchased"]
X = df.drop("Purchased", axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [115]:
learning_rate = 0.001
n_iters = 1000

weights, bias = gradient_descent(X_train,y_train, n_iters,learning_rate)
y_pred, y_pred_bin = predict(X_test, weights,bias)


In [109]:
clf = LogisticRegression()

clf.fit(X_train, y_train)

y_pred_sklearn = clf.predict_proba(X_test)[:,1]
y_pred_sklearn_bin = clf.predict(X_test)
len(y_pred_sklearn_bin)

80

In [118]:
# Métricas para tu implementación
accuracy_custom = accuracy_score(y_test, y_pred_bin)
precision_custom = precision_score(y_test, y_pred_bin)
recall_custom = recall_score(y_test, y_pred_bin)
f1_custom = f1_score(y_test, y_pred_bin)

# Métricas para la implementación de Scikit-Learn
accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn_bin)
precision_sklearn = precision_score(y_test, y_pred_sklearn_bin)
recall_sklearn = recall_score(y_test, y_pred_sklearn_bin)
f1_sklearn = f1_score(y_test, y_pred_sklearn_bin)

print("Métricas para nuestra implementacion :")
print(f"Accuracy: {accuracy_custom}")
print(f"Precisión: {precision_custom}")
print(f"Recall: {recall_custom}")
print(f"F1-score: {f1_custom}")

print("\nMétricas para la implementación de Scikit-Learn:")
print(f"Accuracy: {accuracy_sklearn}")
print(f"Precisión: {precision_sklearn}")
print(f"Recall: {recall_sklearn}")
print(f"F1-score: {f1_sklearn}")

Métricas para nuestra implementacion :
Accuracy: 0.35
Precisión: 0.35
Recall: 1.0
F1-score: 0.5185185185185185

Métricas para la implementación de Scikit-Learn:
Accuracy: 0.65
Precisión: 0.0
Recall: 0.0
F1-score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
