In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score

# Use o dataset do link (https://stats.idre.ucla.edu/stat/data/binary.csv) a seguir para aplicar sobre ele o gradiente descendente estocástico.

## O label (target) do dataset é o atributo admit.

## Use 70% do dados para treino e 30% para teste sem validação cruzada, mas com estratificação. 

In [2]:
data = pd.read_csv("datasets/binary.csv")
data.head()

Unnamed: 0,admit,gre,gpa,rank
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.0,1
3,1,640,3.19,4
4,0,520,2.93,4


In [3]:
X = data.iloc[:,1:4].values
y = data.iloc[:,0].values

X.shape, y.shape

((400, 3), (400,))

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0, stratify=y)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((280, 3), (280,), (120, 3), (120,))

# Obtenha e compare os coeficientes (coef_ e intercept_) usando modelos com diferentes penalidades / regularizações:

### Sem Regularização

### Com Reguralização L1

### Com Regularização L2

### Com Regularização Elastic Net

## Para os experimentos com Regularização, crie também modelos variando a força da regularização (alpha). Use alpha com os seguintes valores: 0.0001, 0.01, 1, 10, 100.

In [5]:
# Sem regularização

model_SGD = SGDClassifier(penalty='none', random_state=0)
model_SGD.fit(X_train, y_train)
y_pred_sgd = model_SGD.predict(X_test)
print("Accuracy of SGD Classifier without regularization is {}".format(accuracy_score(y_test, y_pred_sgd)))
print("Intercept: {}\nCoef: {}".format(model_SGD.intercept_, model_SGD.coef_))

Accuracy of SGD Classifier without regularization is 0.6833333333333333
Intercept: [-134.32732549]
Coef: [[-1451.67173823   -78.14081045 -1115.02957907]]




In [6]:
alphas = [0.0001, 0.01, 1, 10, 100]

In [7]:
# With L1 regularization
reg = 'l1'
for alpha in alphas:
    model_SGD_l1 = SGDClassifier(alpha=alpha, penalty=reg, random_state=0)
    model_SGD_l1.fit(X_train, y_train)
    y_pred_sgd = model_SGD_l1.predict(X_test)
    print("Accuracy of SGD Classifier with {} regularization and alpha {} is {}".format(reg, alpha, 
                                                                    accuracy_score(y_test, y_pred_sgd)))
    print("Intercept: {}\nCoef: {}\n".format(model_SGD_l1.intercept_, model_SGD_l1.coef_))

Accuracy of SGD Classifier with l1 regularization and alpha 0.0001 is 0.6833333333333333
Intercept: [-134.32732549]
Coef: [[-1450.79597776   -77.26504997 -1114.15381859]]

Accuracy of SGD Classifier with l1 regularization and alpha 0.01 is 0.6833333333333333
Intercept: [-3.95623582]
Coef: [[-21.30973229   0.         -37.27943954]]

Accuracy of SGD Classifier with l1 regularization and alpha 1 is 0.6833333333333333
Intercept: [-0.36191896]
Coef: [[-0.27501739  0.          0.        ]]

Accuracy of SGD Classifier with l1 regularization and alpha 10 is 0.6833333333333333
Intercept: [-0.37744003]
Coef: [[-111.42702983    0.            0.        ]]

Accuracy of SGD Classifier with l1 regularization and alpha 100 is 0.6833333333333333
Intercept: [-0.29760467]
Coef: [[-82.19093654   0.           0.        ]]





In [8]:
# With L2 regularization
reg = 'l2'
for alpha in alphas:
    model_SGD_l2 = SGDClassifier(alpha=alpha, penalty=reg, random_state=0)
    model_SGD_l2.fit(X_train, y_train)
    y_pred_sgd = model_SGD_l2.predict(X_test)
    print("Accuracy of SGD Classifier with {} regularization and alpha {} is {}".format(reg, alpha, 
                                                                        accuracy_score(y_test, y_pred_sgd)))
    print("Intercept: {}\nCoef: {}\n".format(model_SGD_l2.intercept_, model_SGD_l2.coef_))

Accuracy of SGD Classifier with l2 regularization and alpha 0.0001 is 0.6833333333333333
Intercept: [-129.33342876]
Coef: [[-1167.15298041   -18.38265944  -696.12338474]]

Accuracy of SGD Classifier with l2 regularization and alpha 0.01 is 0.6833333333333333
Intercept: [-4.68050101]
Coef: [[-19.57189586  -0.30825736 -11.67323789]]

Accuracy of SGD Classifier with l2 regularization and alpha 1 is 0.6833333333333333
Intercept: [-0.87391493]
Coef: [[-0.2        -0.00513571 -0.12285714]]

Accuracy of SGD Classifier with l2 regularization and alpha 10 is 0.6833333333333333
Intercept: [-0.54714406]
Coef: [[-0.02001175 -0.00039881 -0.01265029]]

Accuracy of SGD Classifier with l2 regularization and alpha 100 is 0.6833333333333333
Intercept: [-0.31482603]
Coef: [[ -7.14780126e-04  -7.24072268e-05  -1.35808224e-03]]





In [9]:
# With Elastic net regularization
reg = 'elasticnet'
for alpha in alphas:
    model_SGD_en = SGDClassifier(alpha=alpha, penalty=reg, random_state=0)
    model_SGD_en.fit(X_train, y_train)
    y_pred_sgd = model_SGD_en.predict(X_test)
    print("Accuracy of SGD Classifier with {} regularization and alpha {} is {}".format(reg, alpha, 
                                                                        accuracy_score(y_test, y_pred_sgd)))
    print("Intercept: {}\nCoef: {}\n".format(model_SGD_en.intercept_, model_SGD_en.coef_))

Accuracy of SGD Classifier with elasticnet regularization and alpha 0.0001 is 0.6833333333333333
Intercept: [-129.33342876]
Coef: [[-1208.65708869   -21.0145582   -737.9810785 ]]

Accuracy of SGD Classifier with elasticnet regularization and alpha 0.01 is 0.6833333333333333
Intercept: [-4.74961164]
Coef: [[ -7.28863392  -0.20039619 -13.81413965]]

Accuracy of SGD Classifier with elasticnet regularization and alpha 1 is 0.6833333333333333
Intercept: [-0.84642287]
Coef: [[ 0.  0.  0.]]

Accuracy of SGD Classifier with elasticnet regularization and alpha 10 is 0.6833333333333333
Intercept: [-0.5403354]
Coef: [[-0.01875404  0.          0.        ]]

Accuracy of SGD Classifier with elasticnet regularization and alpha 100 is 0.6833333333333333
Intercept: [-0.31226781]
Coef: [[-0.00127551  0.          0.        ]]



