In [1]:
import math
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('wine_data.csv')

In [5]:
df.head()

Unnamed: 0,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,Diluted wines,Proline,Unnamed: 13
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


#### Implementation of sigmoid

In [6]:
def reg_cost(X, y, theta):
    x_hat = X @ theta
    h = 1 / (1 + np.exp(-x_hat))
    return -(y * np.log(h)) + ((1 - y) * np.log(1 - h))

def reg_cost_prime(X, y, theta):
    h = X @ theta
    return ((X.T @ (h-y)) / len(X))

def gradient_descent(
    X, y, theta_0, 
    cost_function, cost_function_prime,
    learning_rate, threshold, max_iter=1000
):
    theta = theta_0
    iteration = 0
    costs = []
    thetas = []

    while np.linalg.norm(cost_function_prime(X, y, theta)) > threshold and iteration < max_iter:
        iteration += 1
        theta -= learning_rate * cost_function_prime(X, y, theta)
        costs.append(cost_function(X, y, theta))
        thetas.append(theta.copy())
    return theta, costs, thetas

##### ajuste de la data, hacer el shuffle

In [7]:
df = df.sample(frac=1)
df.head()

Unnamed: 0,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,Diluted wines,Proline,Unnamed: 13
50,1,13.05,1.73,2.04,12.4,92,2.72,3.27,0.17,2.91,7.2,1.12,2.91,1150
78,2,12.33,0.99,1.95,14.8,136,1.9,1.85,0.35,2.76,3.4,1.06,2.31,750
95,2,12.47,1.52,2.2,19.0,162,2.5,2.27,0.32,3.28,2.6,1.16,2.63,937
108,2,12.22,1.29,1.94,19.0,92,2.36,2.04,0.39,2.08,2.7,0.86,3.02,312
117,2,12.42,1.61,2.19,22.5,108,2.0,2.09,0.34,1.61,2.06,1.06,2.96,345


In [8]:
df['ones'] = np.ones(len(df))
X = np.array(df.loc[:, ['ones', 'Malic acid', 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins', 'Hue']].copy())
y = np.array(df.loc[:, ['Alcohol']].copy())

#### Split de data
- train 70%
- test 15%
- cross validation 15%



In [9]:
X_train = X[:124]
X_cv = X[125:150]
X_test = X[151:178]
y_train = y[:124]
y_cv = y[125:150]
y_test = y[151:178]


In [10]:
m, n = X.shape
theta_0 = np.random.rand(n, 1)

##### Implementacion de diferentes modelos por grado polinomial

###### Grado polinomial 1

In [23]:
r_theta_tr, costs_tr, thetas_tr = gradient_descent(X_train, y_train, theta_0, reg_cost, reg_cost_prime, 0.000000001, 0.01)
r_theta_tr

array([[ 0.19246474],
       [-0.01178021],
       [ 0.37472702],
       [ 0.30930173],
       [ 0.73776523],
       [ 0.08107034]])

In [24]:
print("Costo de Train: ",  costs_tr[-1][-1][0])

Costo de Train:  4.49103195487084


In [28]:
print("Costo de Test: ", reg_cost(X_test, y_test, r_theta_tr)[-1][0],"\nCosto de CV: " ,reg_cost(X_cv, y_cv, r_theta_tr)[-1][0])

Costo de Test:  2.17078661443846 
Costo de CV:  2.2467044071876545


###### Grado polinomial 2

In [14]:
r_theta_tr2, costs_tr2, thetas_tr2 = gradient_descent(X_train ** 2, y_train, theta_0, reg_cost, reg_cost_prime, 0.0000001, 0.01)
r_theta_tr2

array([[ 0.19245355],
       [-0.00730656],
       [ 0.37518801],
       [ 0.31041455],
       [ 0.73776435],
       [ 0.14403815]])

In [15]:
print("Costo de Train: ",  costs_tr2[-1][-1][0])

Costo de Train:  15.414462097879499


In [30]:
print("Costo de test",reg_cost(X_test ** 2, y_test, r_theta_tr)[-1][0],"\nCV cost" ,reg_cost(X_cv ** 2, y_cv, r_theta_tr)[-1][0])

Costo de test 1.9227141352464185 
CV cost 2.0791010216193304


###### Grado polinomial 3

In [17]:
r_theta_tr3, costs_tr3, thetas_tr3 = gradient_descent(X_train ** 3, y_train, theta_0, reg_cost, reg_cost_prime, 0.00000001, 0.01)
r_theta_tr3

array([[ 0.19246496],
       [-0.01177699],
       [ 0.37472819],
       [ 0.30930333],
       [ 0.73776524],
       [ 0.08107137]])

In [18]:
print("Train cost: " ,costs_tr3[-1][-1][0])

Train cost:  15.03661706098865


In [19]:
print("test cost: " , reg_cost(X_test ** 3, y_test, r_theta_tr)[-1][0],"\ncv cost:",reg_cost(X_cv ** 3, y_cv, r_theta_tr)[-1][0])


test cost:  30.338848186066684 
cv cost: 26.02742838449142


In [29]:
TC= [costs_tr[-1][-1][0], costs_tr2[-1][-1][0], costs_tr3[-1][-1][0]]
TestCost = TestCost = [reg_cost(X_test, y_test, r_theta_tr)[-1][0], reg_cost(X_test, y_test, r_theta_tr)[-1][0] ** 2, reg_cost(X_test, y_test, r_theta_tr)[-1][0] ** 3]


In [31]:
TestCost

[2.17078661443846, 4.712314525425192, 10.229429294816931]

In [32]:
TC

[4.49103195487084, 15.414462097879499, 15.03661706098865]

Como podemos ver en los modelos, el modelo con el costo más bajo es el modelo 1 con un costo de entrenamiento de **4.49**. un costo de test de **2.17** y uno ce cross validation de **2.72**

**nota**

Llegar a un resultado con sentido tuve que jugar mucho con los learning rates y con el treshold