In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [3]:
def normalization(z):
  """
  Funcion sigmoid para normalizar dato de entrada.
  """
  return 1 / (1 + np.exp(-z))

In [4]:
def costo(x, y, theta):
  """
  Funcion de costo para regresion logistica.
  """
  h = normalization(np.dot(theta.T, x))
  if y == 1:
    return -np.log(h)
  elif y == 0:
    return -np.log(1 - h)
  else:
    raise ValueError("La etiqueta y debe ser 0 o 1.")

In [5]:
def gradiente_descendiente(x, y, theta, alpha, num_iters):
  """
  Funcion de gradiente descendiente para optimizar los parametros theta.
  """
  m = x.shape[1]  # número de ejemplos

  for _ in range(num_iters):
    Z = np.dot(theta.T, x)
    H = normalization(Z)
    gradiente = (1/m) * np.dot(x, (H - y).T)
    theta = theta - alpha * gradiente
  return theta


In [6]:
df_train = pd.read_csv('proteins_training_set.csv')
X = df_train.drop(columns=['start_position', 'end_position', 'target']).values.T
Y = df_train['target'].values.reshape(1, -1)
X = np.vstack([np.ones((1, X.shape[1])), X])

In [7]:
n = X.shape[0]
theta = np.zeros((n, 1))
alpha = 0.01
epochs = 1000

def normalization(z):
    return 1 / (1 + np.exp(-z))

def gradiente_descendente(X, Y, theta, alpha, epochs):
  m = X.shape[1]
  for _ in range(epochs):
    Z = np.dot(theta.T, X)
    H = normalization(Z)
    gradiente = (1/m) * np.dot(X, (H - Y).T)
    theta = theta - alpha * gradiente
  return theta

# Entrenar
theta = gradiente_descendente(X, Y, theta, alpha, epochs)

In [8]:
def predecir (x, theta):
  """
  Funcion de prediccion para clasificacion binaria.
  """
  probs = normalization(np.dot(theta.T, X))
  return (probs >= 0.5).astype(int)

In [9]:
df_val = pd.read_csv('proteins_validation_set.csv')
X_val = df_val.drop(columns=['start_position', 'end_position', 'target']).values.T
Y_val = df_val['target'].values.reshape(1, -1)
X_val = np.vstack([np.ones((1, X_val.shape[1])), X_val])

In [10]:
Y_pred_val = predecir(X_val, theta)
accuracy_val = np.mean(Y_pred_val == Y_val)
print(f"Precisión en validación: {accuracy_val * 100:.2f}%")

ValueError: operands could not be broadcast together with shapes (1,9204) (1,2303) 