In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_friedman1, make_classification

In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [3]:
from sklearn.linear_model import Ridge, Lasso

In [4]:
def getData():
    X, y = make_friedman1(n_samples = 10000, n_features = 5, noise = 5.0, random_state = 0)
    return X, y

In [5]:
X,y = getData()
print(X.shape,y.shape)

(10000, 5) (10000,)


In [6]:
Xtrain, Xtest, ytrain, ytest = X[:5000], X[5000:], y[:5000], y[5000:]
print(Xtrain.shape,Xtest.shape,ytrain.shape,ytest.shape)

(5000, 5) (5000, 5) (5000,) (5000,)


In [7]:
def regLinearEquacaoNormal(X, y):
   
    X = np.c_[np.ones((X.shape[0], 1)), X]
    
    transposta_X = X.T
    XTX = transposta_X.dot(X)
    inversa = np.linalg.inv( XTX )
    
    theta_best = inversa.dot(transposta_X).dot(y)
    return theta_best

In [8]:
class regLinear():
    
    def __init__(self, learning_rate, num_steps):
        self.learning_rate = learning_rate
        self.num_steps = num_steps
        
    def fit(self, X, y):
        y = y.reshape(-1,1)
        m = X.shape[0] 
        k = X.shape[1] 
        theta = np.random.randn(k+1,1) 
        X_b = np.c_[np.ones((m, 1)), X] 
        for step in range(self.num_steps):
            gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)
            theta = theta - self.learning_rate * gradients
        self.theta_final = theta
        print("modelo treinado.")
        
    def predict(self, X):
        m = X.shape[0]
        X_b = np.c_[np.ones((m, 1)), X]
        preds = X_b.dot(self.theta_final)
        return preds.reshape(-1,)

In [9]:
learning_rate =(0.075,0.10,0.25,0.75)

num_steps = 200

for i in learning_rate:
    rg = regLinear(learning_rate = i, num_steps = 200)
    rg.fit(Xtrain,ytrain)
    rg_theta_final = rg.theta_final
    
    print("learning_rate:",i,"valor de theta final", rg_theta_final)
    print("*****"*20)
    



modelo treinado.
learning_rate: 0.075 valor de theta final [[1.9998418 ]
 [5.38100458]
 [6.26202079]
 [0.1797095 ]
 [9.26947918]
 [3.8876161 ]]
****************************************************************************************************
modelo treinado.
learning_rate: 0.1 valor de theta final [[1.38158121]
 [5.63301508]
 [6.50487428]
 [0.038903  ]
 [9.81411172]
 [4.18382474]]
****************************************************************************************************
modelo treinado.
learning_rate: 0.25 valor de theta final [[-0.20757347]
 [ 6.31923751]
 [ 7.22308399]
 [ 0.43389104]
 [10.49709979]
 [ 4.78159486]]
****************************************************************************************************
modelo treinado.
learning_rate: 0.75 valor de theta final [[-5.41743211e+77]
 [-2.80606707e+77]
 [-2.76090209e+77]
 [-2.79518341e+77]
 [-2.82800369e+77]
 [-2.78549888e+77]]
*****************************************************************************************

In [33]:
learning_rate =(0.075,0.10,0.25,0.75)



for i in learning_rate:
    rg = regLinear(learning_rate = i, num_steps = 20)
    rg.fit(Xtrain,ytrain)
    rg_theta_final = rg.theta_final
    
    print("learning_rate:",i,"valor de theta final", rg_theta_final)
    print("*****"*20)

modelo treinado.
learning_rate: 0.075 valor de theta final [[5.2840565 ]
 [3.59590411]
 [4.15529099]
 [2.15093167]
 [4.98253393]
 [3.75402645]]
****************************************************************************************************
modelo treinado.
learning_rate: 0.1 valor de theta final [[6.24376807]
 [3.56338791]
 [3.43171602]
 [1.54465845]
 [5.89159311]
 [2.32966946]]
****************************************************************************************************
modelo treinado.
learning_rate: 0.25 valor de theta final [[4.45745016]
 [4.39293885]
 [4.50205336]
 [1.01737696]
 [7.3046809 ]
 [3.00187627]]
****************************************************************************************************
modelo treinado.
learning_rate: 0.75 valor de theta final [[-2.74342073e+08]
 [-1.42100947e+08]
 [-1.39813764e+08]
 [-1.41549797e+08]
 [-1.43211827e+08]
 [-1.41059363e+08]]
***********************************************************************************************

In [34]:
learning_rate =(0.075,0.10,0.25,0.75)



for i in learning_rate:
    rg = regLinear(learning_rate = i, num_steps = 500)
    rg.fit(Xtrain,ytrain)
    rg_theta_final = rg.theta_final
    
    print("learning_rate:",i,"valor de theta final", rg_theta_final)
    print("*****"*20)

modelo treinado.
learning_rate: 0.075 valor de theta final [[-4.88400582e-03]
 [ 6.23400316e+00]
 [ 7.13770127e+00]
 [ 3.68248295e-01]
 [ 1.04240545e+01]
 [ 4.69791367e+00]]
****************************************************************************************************
modelo treinado.
learning_rate: 0.1 valor de theta final [[-0.24843176]
 [ 6.33590386]
 [ 7.23979904]
 [ 0.44906687]
 [10.51092632]
 [ 4.79843594]]
****************************************************************************************************
modelo treinado.
learning_rate: 0.25 valor de theta final [[-0.39807386]
 [ 6.39789116]
 [ 7.3014313 ]
 [ 0.50284868]
 [10.56229129]
 [ 4.8598197 ]]
****************************************************************************************************
modelo treinado.
learning_rate: 0.75 valor de theta final [[-9.53347116e+192]
 [-4.93805163e+192]
 [-4.85857136e+192]
 [-4.91889882e+192]
 [-4.97665519e+192]
 [-4.90185622e+192]]
***********************************************

In [10]:
from sklearn.linear_model import LinearRegression

In [11]:
lm = LinearRegression()
lm.fit(Xtrain, ytrain)
print(lm.intercept_, lm.coef_)

-0.3988073374988339 [ 6.39819248  7.30173225  0.50311872 10.56253883  4.86012208]


In [12]:
rg =regLinear(learning_rate = 0.25, num_steps = 200)
rg.fit(Xtrain, ytrain)
rg.theta_final

modelo treinado.


array([[-0.23562173],
       [ 6.33038027],
       [ 7.23480347],
       [ 0.44457891],
       [10.50640207],
       [ 4.79313477]])

In [13]:
regLinearEquacaoNormal(Xtrain,ytrain)

array([-0.39880734,  6.39819248,  7.30173225,  0.50311872, 10.56253883,
        4.86012208])

In [14]:
from sklearn.preprocessing import PolynomialFeatures

In [15]:
poly_features = PolynomialFeatures(degree = 2, include_bias = False)

In [16]:
X_poly = poly_features.fit_transform(Xtrain )
print(Xtrain.shape,X_poly.shape)

(5000, 5) (5000, 20)


In [17]:
poly_fit = LinearRegression() 
poly_fit.fit(X_poly, ytrain)

poly_fit.intercept_, poly_fit.coef_

(0.02408829894702791,
 array([ 17.82766045,  18.51764259, -21.21737427,   8.24556191,
          5.62616108, -11.70132627,  -0.42505923,   0.46198251,
          0.9659422 ,  -0.31197023, -12.15637379,   0.88698983,
          1.05847985,   0.34741528,  20.62798431,   1.51193294,
         -0.96145085,   0.31591321,   0.38243331,  -0.39489664]))

In [18]:
regLinearEquacaoNormal(X_poly,ytrain)

array([  0.0240883 ,  17.82766045,  18.51764259, -21.21737427,
         8.24556191,   5.62616108, -11.70132627,  -0.42505923,
         0.46198251,   0.9659422 ,  -0.31197023, -12.15637379,
         0.88698983,   1.05847985,   0.34741528,  20.62798431,
         1.51193294,  -0.96145085,   0.31591321,   0.38243331,
        -0.39489664])

In [19]:
def polyFitReg(X, y, grau, base_model, base_model_name):
    
    polybig_features = PolynomialFeatures(degree = grau, include_bias = False)
    std_scaler = StandardScaler()
    basemodel = base_model
    
   
    polynomial_regression = Pipeline([
            ("poly_features", polybig_features),
            ("std_scaler", std_scaler),
            (base_model_name, base_model),
        ])
    
    polynomial_regression.fit(X, y)
    return polynomial_regression

In [25]:
grau = 2 

for alpha in [0, 0.001, 0.01, 0.1, 1, 10, 100]:
    
    model_name = 'Ridge_ alpha: '+str(alpha)
    polyfit = polyFitReg(Xtrain, 
                         ytrain, 
                         grau, 
                         base_model = Ridge(alpha = alpha), 
                         base_model_name = model_name)
    
    print("alpha",alpha)
    print(poly_fit.intercept_, poly_fit.coef_)
    print("*********"*10)
    print("*********"*10)

alpha 0
0.02408829894702791 [ 17.82766045  18.51764259 -21.21737427   8.24556191   5.62616108
 -11.70132627  -0.42505923   0.46198251   0.9659422   -0.31197023
 -12.15637379   0.88698983   1.05847985   0.34741528  20.62798431
   1.51193294  -0.96145085   0.31591321   0.38243331  -0.39489664]
******************************************************************************************
******************************************************************************************
alpha 0.001
0.02408829894702791 [ 17.82766045  18.51764259 -21.21737427   8.24556191   5.62616108
 -11.70132627  -0.42505923   0.46198251   0.9659422   -0.31197023
 -12.15637379   0.88698983   1.05847985   0.34741528  20.62798431
   1.51193294  -0.96145085   0.31591321   0.38243331  -0.39489664]
******************************************************************************************
******************************************************************************************
alpha 0.01
0.02408829894702791 [ 17.82766045  

In [27]:
grau = 20 

for alpha in [0, 0.001, 0.01, 0.1, 1, 10, 100]:
    
    model_name = 'Ridge_ alpha: '+str(alpha)
    polyfit = polyFitReg(Xtrain, 
                         ytrain, 
                         grau, 
                         base_model = Ridge(alpha = alpha), 
                         base_model_name = model_name)
    
    print("alpha",alpha)
    print(poly_fit.intercept_, poly_fit.coef_)
    print("*********"*10)
    print("*********"*10)



alpha 0
0.02408829894702791 [ 17.82766045  18.51764259 -21.21737427   8.24556191   5.62616108
 -11.70132627  -0.42505923   0.46198251   0.9659422   -0.31197023
 -12.15637379   0.88698983   1.05847985   0.34741528  20.62798431
   1.51193294  -0.96145085   0.31591321   0.38243331  -0.39489664]
******************************************************************************************
******************************************************************************************
alpha 0.001
0.02408829894702791 [ 17.82766045  18.51764259 -21.21737427   8.24556191   5.62616108
 -11.70132627  -0.42505923   0.46198251   0.9659422   -0.31197023
 -12.15637379   0.88698983   1.05847985   0.34741528  20.62798431
   1.51193294  -0.96145085   0.31591321   0.38243331  -0.39489664]
******************************************************************************************
******************************************************************************************
alpha 0.01
0.02408829894702791 [ 17.82766045  

In [35]:
learning_rate =(0.075,0.10,0.25,0.75)



for i in learning_rate:
    rg = regLinear(learning_rate = i, num_steps = 200)
    rg.fit(X_poly,ytrain)
    rg_theta_final = rg.theta_final
    print("grau:2")
    print("learning_rate:",i,"valor de theta final", rg_theta_final)
    print("*****"*20)

modelo treinado.
grau:2
learning_rate: 0.075 valor de theta final [[ 3.89562846]
 [ 4.4440892 ]
 [ 1.66810902]
 [-0.94353614]
 [ 3.68364953]
 [ 2.36755206]
 [-1.0114779 ]
 [ 1.93975694]
 [ 0.48921814]
 [ 1.48333416]
 [ 1.34670137]
 [ 1.78499096]
 [-0.11739008]
 [ 2.40574318]
 [ 2.61699987]
 [ 1.56036745]
 [ 1.4793079 ]
 [-1.14516272]
 [ 2.85202103]
 [ 2.52221687]
 [-0.24285364]]
****************************************************************************************************
modelo treinado.
grau:2
learning_rate: 0.1 valor de theta final [[ 3.97049828]
 [ 2.44877912]
 [ 4.65697393]
 [-1.95804616]
 [ 2.98412611]
 [ 2.36303158]
 [ 0.37636542]
 [ 0.86138895]
 [-0.07280626]
 [ 3.50795266]
 [ 2.11230622]
 [-0.40255832]
 [-0.30348219]
 [ 2.80255967]
 [ 2.10226615]
 [ 3.40542089]
 [ 0.97660138]
 [-1.57405141]
 [ 2.68334603]
 [ 2.29344368]
 [-0.02365263]]
****************************************************************************************************
modelo treinado.
grau:2
learning_rat

In [36]:
lm = LinearRegression()
lm.fit(X_poly, ytrain)
print(lm.intercept_, lm.coef_)

0.02408829894702791 [ 17.82766045  18.51764259 -21.21737427   8.24556191   5.62616108
 -11.70132627  -0.42505923   0.46198251   0.9659422   -0.31197023
 -12.15637379   0.88698983   1.05847985   0.34741528  20.62798431
   1.51193294  -0.96145085   0.31591321   0.38243331  -0.39489664]


In [37]:
def logLossCost(ytrue, ypred_probs):
    return (ytrue * np.log(ypred_probs) + (1 - ytrue) * np.log(1 - ypred_probs)).mean() * -1

In [38]:
def sigmoid(t):
    return 1 / (1 + np.exp(-t))

In [62]:
class regLogistica():


    def __init__(self,learning_rate, num_steps, limiar):
        self.learning_rate = learning_rate
        self.num_steps = num_steps
        self.limiar = limiar
        
    def fit(X, y):
        y = y.reshape(-1,1)
        X_b = np.c_[np.ones(X.shape[0]), X]
        theta = np.random.randn(X_b.shape[1],1)
        for step in range(num_steps):
            yscores = sigmoid(X_b.dot(theta))
            gradient = X_b.T.dot(yscores - y)
            theta = theta - learning_rate * gradient
            logloss_step = logLossCost(ytrue = y, ypred_probs = yscores)
  
        theta_final = theta
        print("Log Loss:", logloss_step)
        print('\n-----------------------------------------------------------\n')
    
    def predict_proba(self,X):
        probs = sigmoid(X_b.dot(theta_final))
        print(probs)
        
    def predict(self,X):
        ypred = np.where(probs > limiar, 1, 0)
        print(ypred)
        

    

In [40]:
def getData2():
    X, y = make_classification(n_classes=2, n_features=5, n_samples=10000, random_state = 0)
    return X, y

In [56]:
X_,y_ = getData()
print(X1.shape,y1.shape)

(10000, 5) (10000,)


In [64]:
learning_rate = 0.25
num_steps = 20
limiar = 0.5 
regLogistica(learning_rate ,num_steps, limiar )

<__main__.regLogistica at 0x2a74ab277c0>

In [65]:
regLogistica.fit(X_,y_)

Log Loss: nan

-----------------------------------------------------------



  return (ytrue * np.log(ypred_probs) + (1 - ytrue) * np.log(1 - ypred_probs)).mean() * -1


In [None]:
# opa tudo bem eu to mandano a atividade mais sei que deve ta errado mais nao sei onde se encotra o erro 
# resolvir manda logo do que chama no discord pq achei mais pratico  