# 1. Definimos el path del CSV

In [1]:
path = "botnet_reduced_l.csv"

Definimos las funciones centralizadas

In [2]:
import numpy as np

def readFile(filename):
    data = np.loadtxt(filename, delimiter=',')
    X = data[:, :-1]  # Todas las filas, todas las columnas excepto la última
    y = data[:, -1].astype(int)  # Última columna, convertida a int
    return X, y

def normalize(X):
    means = X.mean(axis=0)
    stdevs = X.std(axis=0)
    normalized_X = (X - means) / stdevs
    return normalized_X


def train(X, y, iterations, learning_rate, lambda_reg):
    m, n = X.shape
    w = np.zeros(n)
    b = 0

    for _ in range(iterations):
        linear_model = np.dot(X, w) + b
        pred = 1 / (1 + np.exp(-linear_model))
        error = pred - y

        gradient_w = np.dot(X.T, error) / m + lambda_reg * w
        gradient_b = np.sum(error) / m

        w -= learning_rate * gradient_w
        b -= learning_rate * gradient_b

    return w, b


def accuracy(w, b, X, y):
    linear_model = np.dot(X, w) + b
    pred = 1 / (1 + np.exp(-linear_model))
    predictions = pred >= 0.5
    correct = (predictions == y).sum()
    total = y.size
    return correct / total


Cargamos los datos

In [3]:
import numpy as np
from sklearn.model_selection import train_test_split  # Para dividir los datos

# Usar tus funciones definidas previamente aquí

# Paso 1: Leer los datos
X, y = readFile("botnet_reduced_l.csv")

# Paso 2: Normalizar los datos
X_norm = normalize(X)

# Paso 3: Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.2, random_state=42)


Entrenamos el modelo

In [4]:
iterations = 1000  # Por ejemplo, 1000 iteraciones
learning_rate = 0.01  # Un valor común para empezar
lambda_reg = 0.1  # Regularización para evitar el sobreajuste
w, b = train(X_train, y_train, iterations, learning_rate, lambda_reg)

# Paso 5: Evaluar el modelo
train_accuracy = accuracy(w, b, X_train, y_train)
test_accuracy = accuracy(w, b, X_test, y_test)

print(f"Accuracy on training set: {train_accuracy * 100:.2f}%")
print(f"Accuracy on test set: {test_accuracy * 100:.2f}%")

Accuracy on training set: 92.23%
Accuracy on test set: 92.24%
