In [2]:
from numpy.random import rand
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error

# Función para predecir un valor dado una fila y coeficientes
def predict_row(row, coefficients):
    # Iniciar con el sesgo (bias), que es el último coeficiente
    result = coefficients[-1]
    # Sumar cada entrada multiplicada por su coeficiente
    for i in range(len(row)):
        result += coefficients[i] * row[i]
    return result

# Función para predecir para todo un conjunto de datos X
def predict_dataset(X, coefficients):
    yhats = []
    for row in X:
        yhat = predict_row(row, coefficients)
        yhats.append(yhat)
    return yhats

# Crear dataset sintético de regresión
X, y = make_regression(n_samples=1000, n_features=10, n_informative=2, noise=0.2, random_state=1)

# Número de coeficientes (incluye el bias)
n_coeff = X.shape[1] + 1

# Generar coeficientes aleatorios
coefficients = rand(n_coeff)

# Generar predicciones
yhat = predict_dataset(X, coefficients)

# Calcular el error cuadrático medio
score = mean_squared_error(y, yhat)

print(f'MSE: {score:.6f}')

MSE: 7179.559763


In [4]:
from numpy.random import randn, rand
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Función para predecir una fila
def predict_row(row, coefficients):
    result = coefficients[-1]  # bias (intercepto)
    for i in range(len(row)):
        result += coefficients[i] * row[i]
    return result

# Función para predecir todo el dataset
def predict_dataset(X, coefficients):
    yhats = []
    for row in X:
        yhat = predict_row(row, coefficients)
        yhats.append(yhat)
    return yhats

# Función objetivo: calcula el error cuadrático medio para un conjunto dado de coeficientes
def objective(X, y, coefficients):
    yhat = predict_dataset(X, coefficients)
    score = mean_squared_error(y, yhat)
    return score

# Algoritmo de búsqueda local Hill Climbing
def hillclimbing(X, y, objective, solution, n_iter, step_size):
    solution_eval = objective(X, y, solution)  # evaluamos la solución inicial
    for i in range(n_iter):
        # creamos un nuevo candidato añadiendo ruido gaussiano a la solución actual
        candidate = solution + randn(len(solution)) * step_size
        candidate_eval = objective(X, y, candidate)
        # si el candidato es mejor (menor error), lo aceptamos
        if candidate_eval <= solution_eval:
            solution, solution_eval = candidate, candidate_eval
        # reporte de progreso
        print('>%d %.5f' % (i, solution_eval))
    return [solution, solution_eval]

# Generar dataset sintético
X, y = make_regression(n_samples=1000, n_features=10, n_informative=2, noise=0.2, random_state=1)

# Dividir en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

# Parámetros de la búsqueda hill climbing
n_iter = 2000
step_size = 0.15

# Número de coeficientes (características + bias)
n_coef = X.shape[1] + 1

# Solución inicial aleatoria
solution = rand(n_coef)

# Ejecutar búsqueda local
coefficients, score = hillclimbing(X_train, y_train, objective, solution, n_iter, step_size)

print('Done!')
print('Coefficients: %s' % coefficients)
print('Train MSE: %f' % score)

# Predecir con el conjunto de prueba
yhat = predict_dataset(X_test, coefficients)

# Calcular el error en el conjunto de prueba
score = mean_squared_error(y_test, yhat)
print('Test MSE: %f' % score)

>0 7312.03437
>1 7312.03437
>2 7312.03437
>3 7310.31841
>4 7310.31841
>5 7310.31841
>6 7309.12819
>7 7309.12819
>8 7309.12819
>9 7309.12819
>10 7309.12819
>11 7279.58949
>12 7247.13737
>13 7247.13737
>14 7236.16612
>15 7236.16612
>16 7201.98702
>17 7187.06403
>18 7153.57531
>19 7150.88513
>20 7150.88513
>21 7150.88513
>22 7150.88513
>23 7127.47584
>24 7100.83112
>25 7100.83112
>26 7100.83112
>27 7094.17383
>28 7057.32661
>29 7040.61373
>30 7040.61373
>31 7040.61373
>32 7025.86453
>33 7021.89868
>34 7020.58152
>35 7020.58152
>36 7020.58152
>37 6987.12632
>38 6987.12632
>39 6987.12632
>40 6987.12632
>41 6978.31223
>42 6941.47138
>43 6913.07871
>44 6913.07871
>45 6906.50281
>46 6904.72153
>47 6865.59016
>48 6855.98655
>49 6847.84202
>50 6840.13032
>51 6840.13032
>52 6832.80108
>53 6832.80108
>54 6830.57058
>55 6830.57058
>56 6830.57058
>57 6830.57058
>58 6805.14458
>59 6804.68916
>60 6804.68916
>61 6782.07421
>62 6782.07421
>63 6782.07421
>64 6775.59032
>65 6775.59032
>66 6755.86171
>67 6

In [6]:
from math import exp
from numpy.random import rand
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score

# función para predecir la probabilidad para una fila con regresión logística
def predict_row(row, coefficients):
    result = coefficients[-1]  # bias (intercepto)
    for i in range(len(row)):
        result += coefficients[i] * row[i]
    logistic = 1.0 / (1.0 + exp(-result))  # función sigmoide
    return logistic

# función para predecir todo el dataset
def predict_dataset(X, coefficients):
    yhats = []
    for row in X:
        yhat = predict_row(row, coefficients)
        yhats.append(yhat)
    return yhats

# Generar dataset sintético para clasificación binaria
X, y = make_classification(n_samples=1000, n_features=5, n_informative=2,
                           n_redundant=1, random_state=1)

# Número de coeficientes (features + bias)
n_coeff = X.shape[1] + 1

# Coeficientes aleatorios iniciales
coefficients = rand(n_coeff)

# Predicciones (probabilidades) para todo el dataset
yhat = predict_dataset(X, coefficients)

# Convertir probabilidades a etiquetas 0 o 1 (umbral 0.5)
yhat_labels = [round(prob) for prob in yhat]

# Calcular exactitud (accuracy)
score = accuracy_score(y, yhat_labels)

print('Accuracy: %f' % score)

Accuracy: 0.638000


In [8]:
from math import exp
from numpy.random import randn, rand
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Función para predecir la probabilidad para una muestra usando regresión logística
def predict_row(row, coefficients):
    result = coefficients[-1]  # bias (intercepto)
    for i in range(len(row)):
        result += coefficients[i] * row[i]
    logistic = 1.0 / (1.0 + exp(-result))  # función sigmoide
    return logistic

# Función para predecir todo el dataset
def predict_dataset(X, coefficients):
    yhats = []
    for row in X:
        yhat = predict_row(row, coefficients)
        yhats.append(yhat)
    return yhats

# Función objetivo: calcula accuracy (cuanto mayor mejor)
def objective(X, y, coefficients):
    yhat = predict_dataset(X, coefficients)
    yhat_labels = [round(prob) for prob in yhat]  # convierte probabilidades a 0 o 1
    score = accuracy_score(y, yhat_labels)
    return score

# Algoritmo hill climbing para maximizar accuracy
def hillclimbing(X, y, objective, solution, n_iter, step_size):
    solution_eval = objective(X, y, solution)  # evalúa solución inicial
    for i in range(n_iter):
        candidate = solution + randn(len(solution)) * step_size  # nuevo candidato
        candidate_eval = objective(X, y, candidate)  # evalúa candidato
        if candidate_eval >= solution_eval:  # si mejora o iguala, se acepta
            solution, solution_eval = candidate, candidate_eval
        print('>%d %.5f' % (i, solution_eval))
    return [solution, solution_eval]

# Generar dataset sintético de clasificación binaria
X, y = make_classification(n_samples=1000, n_features=5, n_informative=2,
                           n_redundant=1, random_state=1)

# Dividir en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

# Parámetros del hill climbing
n_iter = 2000
step_size = 0.1

# Número de coeficientes (features + bias)
n_coef = X.shape[1] + 1

# Solución inicial aleatoria
solution = rand(n_coef)

# Ejecutar hill climbing
coefficients, score = hillclimbing(X_train, y_train, objective, solution, n_iter, step_size)

print('Done!')
print('Coefficients: %s' % coefficients)
print('Train Accuracy: %f' % score)

# Predecir con el conjunto de prueba
yhat = predict_dataset(X_test, coefficients)
yhat_labels = [round(prob) for prob in yhat]

# Calcular accuracy en prueba
test_score = accuracy_score(y_test, yhat_labels)
print('Test Accuracy: %f' % test_score)

>0 0.42239
>1 0.47015
>2 0.48955
>3 0.48955
>4 0.48955
>5 0.48955
>6 0.48955
>7 0.48955
>8 0.49552
>9 0.49552
>10 0.49552
>11 0.49552
>12 0.49552
>13 0.49552
>14 0.49552
>15 0.49552
>16 0.49552
>17 0.50149
>18 0.51493
>19 0.52836
>20 0.53881
>21 0.55970
>22 0.55970
>23 0.58955
>24 0.58955
>25 0.58955
>26 0.63284
>27 0.63284
>28 0.63284
>29 0.66269
>30 0.66269
>31 0.68209
>32 0.68209
>33 0.71343
>34 0.74328
>35 0.74328
>36 0.74776
>37 0.76567
>38 0.76716
>39 0.78657
>40 0.79552
>41 0.81194
>42 0.81194
>43 0.82239
>44 0.82239
>45 0.83134
>46 0.83134
>47 0.83134
>48 0.83134
>49 0.83134
>50 0.83134
>51 0.83731
>52 0.83731
>53 0.83731
>54 0.83731
>55 0.83731
>56 0.83731
>57 0.84478
>58 0.84478
>59 0.84478
>60 0.84478
>61 0.84627
>62 0.85075
>63 0.85075
>64 0.85075
>65 0.85821
>66 0.85821
>67 0.85821
>68 0.85821
>69 0.85821
>70 0.85821
>71 0.85821
>72 0.85821
>73 0.85821
>74 0.85821
>75 0.85821
>76 0.85821
>77 0.85821
>78 0.85821
>79 0.85821
>80 0.85821
>81 0.85821
>82 0.85821
>83 0.85821
>8