In [1]:
import pandas as pd
import numpy as np
from aux_fun import *

In [2]:
# Display only 2 decimals on pandas data frames
pd.options.display.float_format = '{:.2f}'.format

# Display all cell results (not only last one)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
# Lendo dados de treino
dados = pd.read_csv('https://raw.githubusercontent.com/Cayan-Portela/ceub/main/dados/bank_customer_treino.csv')
dados.head()

Unnamed: 0,customer_id,credit_score,country,gender,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn
0,15765192,564,France,Male,26,7,84006.88,2,0,0,183490.99,0
1,15631882,688,Germany,Male,45,9,103399.87,1,0,0,129870.93,0
2,15777586,784,Spain,Female,42,2,109052.04,2,1,0,6409.55,0
3,15577107,657,Spain,Female,22,6,0.0,3,0,1,168412.07,1
4,15722731,653,France,Male,46,0,119556.1,1,1,0,78250.13,1


In [4]:
# pre-process da coluna gender
dados['gender'] = np.where(dados.gender == "Male", 1,  0)

In [5]:
# covariaveis (col_x) e variavel target (col_y)
col_x = ['credit_score', 'gender', 'age', 'credit_card']
col_x_std = ['credit_score_std', 'gender', 'age_std', 'credit_card']
col_y = 'churn'

In [36]:
media_score = np.mean(dados.credit_score)
media_age = np.mean(dados.age)

std_score = np.std(dados.credit_score)
std_age = np.std(dados.age)

dados['credit_score_std'] =  (dados.credit_score - media_score) / std_score
dados['age_std'] =  (dados.age - media_age) / std_age
dados.head()

Unnamed: 0,customer_id,credit_score,country,gender,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn,credit_score_std,age_std
0,15765192,564,France,1,26,7,84006.88,2,0,0,183490.99,0,-0.88,-1.23
1,15631882,688,Germany,1,45,9,103399.87,1,0,0,129870.93,0,0.4,0.56
2,15777586,784,Spain,0,42,2,109052.04,2,1,0,6409.55,0,1.39,0.27
3,15577107,657,Spain,0,22,6,0.0,3,0,1,168412.07,1,0.08,-1.61
4,15722731,653,France,1,46,0,119556.1,1,1,0,78250.13,1,0.03,0.65


In [14]:
y = dados[col_y]
X_mat_std = matriz_x(colunas=col_x_std, dados=dados)
X_mat = matriz_x(colunas=col_x, dados=dados)
#X_mat = matriz_x(colunas=col_x[1:], dados=dados)

Newton-Raphson

$\beta^{(k+1)} = $

In [15]:
# Iteracoes 
betas = beta_inicial(X_mat)
p = calcula_p(X=X_mat, B=betas)
W = np.diag(p)


for i in range(20):
    p =  calcula_p(X=X_mat, B=betas)
    W = np.diag(p)
    beta_k1 = beta_update(X=X_mat, W=W, y=y, p=p)
    betas = betas + beta_k1

In [43]:
X_mat.shape

(6000, 5)

In [25]:
# Gradiente Descendente 
betas_std = beta_inicial(X_mat_std)
lr = 0.01
m = X_mat.shape[0]

for i in range(100000):
    p =  calcula_p(X=X_mat_std, B=betas_std)
    grad_beta = -(1/m) * X_mat_std.T @ (y-p)
    betas_std = betas_std - lr * grad_beta

In [47]:
# newton-raphson
print(f"Newton-Raphson: {betas}")

# gradiente descendente
print(f"Gradiente: {betas_std}")

Newton-Raphson: [-3.25658521e+00 -6.58125817e-04 -4.89180017e-01  6.40564764e-02
 -4.90816166e-02]
Gradiente: [-1.18023098 -0.06374409 -0.48918016  0.680995   -0.04908161]


In [50]:
print(f"Predito (NR):    {calcula_p(X_mat[:5,:], betas)}")
print(f"Preditos (Grad): {calcula_p(X_mat_std[:5,:], betas_std)}")

Predito (NR):    [0.0793304  0.21148217 0.24392764 0.09281557 0.21788999]
Preditos (Grad): [0.07933037 0.2114822  0.24392769 0.09281553 0.21789004]


In [8]:
from sklearn.linear_model import LogisticRegression

In [9]:
sk_logistica = LogisticRegression()
sk_logistica.fit(dados[col_x], y)

In [10]:
print(f"Nossos Betas: {betas}")
print(f"Sklearn Betas: {sk_logistica.intercept_, sk_logistica.coef_}")

Nossos Betas: [-3.25658521e+00 -6.58125817e-04 -4.89180017e-01  6.40564764e-02
 -4.90816166e-02]
Sklearn Betas: (array([-3.27061466]), array([[-0.00063749, -0.49520859,  0.0641967 , -0.052929  ]]))


Achar a probabilidade de churn de cada cliente na base de teste

In [11]:
dados_teste = pd.read_csv('https://raw.githubusercontent.com/Cayan-Portela/ceub/main/dados/bank_custoter_teste.csv')
dados_teste['gender'] = np.where(dados_teste.gender == "Male", 1, 0)

In [12]:
X_mat_teste = matriz_x(colunas=col_x, dados=dados_teste)
p_teste = calcula_p(X=X_mat_teste, B=betas)

In [14]:
# ordernando as probabilidades "p_teste" da maior para a menor
# olhando as 10 maiores probabilidades estimadas
np.sort(p_teste)[::-1][:10]

array([0.86478835, 0.83914253, 0.83676744, 0.83495899, 0.82278887,
       0.80055285, 0.79392211, 0.78976356, 0.78309564, 0.77431609])

In [24]:
pd.crosstab(
    np.where(p_teste >= 0.5, 1, 0),
    dados_teste['churn']
)

churn,0,1
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0,3089,753
1,116,42


In [28]:
dados_teste.churn.value_counts(normalize=True)

0   0.80
1   0.20
Name: churn, dtype: float64

In [29]:
np.sqrt(10)

3.1622776601683795

Calcule $\sqrt{10}$ usando newton-raphson

$\sqrt{10} = x$

$x^2 = 10$

$x^2 - 10 = 0$

In [2]:
def x_update(x):
    #return (1/2) * (x  + 10/x)
    return ((x**2)-10) / (2*x)

In [3]:
x_ = 3

for i in range(20):
    x_ = x_ - x_update(x_)

Show that a root of the equation $x^4 − 2x^3 + 2x − 2 = 0$ lies between $x = 1$ and $x = 2$.

In [1]:
def x_update_dois(x):
    return (x**4 - 2*x**3 + 2*x - 2) / (4*x**3 - 6*x**2 + 2)

In [2]:
x_ = 1.5

for i in range(20):
    x_ = x_ - x_update_dois(x_)

In [4]:
x_

1.7166727492822866

In [3]:
def update_x_new(x):
    return (2*x**3  + 5)/(3*x**2 - 2)

In [5]:
for i in range(20):
    x_ = x_ - x_update_dois(x_)

Gradiente

In [43]:
x_ = 0
alpha = 0.01
for i in range(550):
    x_ = x_ - alpha * (x_**2 - 10)

In [None]:
betas_std = beta_inicial(X_mat_std)
lr = 0.01
m = X_mat.shape[0]

for i in range(100000):
    p =  calcula_p(X=X_mat_std, B=betas_std)
    grad_beta = -(1/m) * X_mat_std.T @ (y-p)
    betas_std = betas_std - lr * grad_beta

In [64]:
betas_ = beta_inicial(X_mat)
betas_
p1 = calcula_p(X=X_mat, B=betas_)
p1
grad_beta = -(1/m) * X_mat.T @ (y-p1)
betas_k2 = betas_ - 0.01 * grad_beta

betas_k2
p2 = calcula_p(X=X_mat, B=betas_k2)
p2
grad_beta = -(1/m) * X_mat.T @ (y-p2)
betas_k3 = betas_k2 - 0.01 * grad_beta
betas_k3

p3 = calcula_p(X=X_mat, B=betas_k3)
grad_beta = -(1/m) * X_mat.T @ (y-p3)
betas_k4 = betas_k3 - 0.01 * grad_beta
betas_k4

p4 = calcula_p(X=X_mat, B=betas_k4)
grad_beta = -(1/m) * X_mat.T @ (y-p4)
betas_k5 = betas_k4 - 0.01 * grad_beta
betas_k5

calcula_p(X=X_mat, B=betas_k5)

array([0., 0., 0., 0., 0.])

array([0.5, 0.5, 0.5, ..., 0.5, 0.5, 0.5])

array([-2.93000000e-03, -1.91188333e+00, -1.79416667e-03, -1.01899167e-01,
       -2.09833333e-03])

array([0., 0., 0., ..., 0., 0., 0.])

array([-0.00086   , -0.57545   , -0.00085583, -0.0083525 , -0.000655  ])

array([1.21000000e-03, 7.60983333e-01, 8.25000000e-05, 8.51941667e-02,
       7.88333333e-04])

array([-0.00672   , -4.39921667, -0.00444417, -0.21215083, -0.00485167])

array([0., 0., 0., ..., 0., 0., 0.])

In [77]:
betas_ = beta_inicial(X_mat)
lr = 0.01
m = X_mat.shape[0]

for i in range(10):
    p =  calcula_p(X=X_mat, B=betas_)
    print(p, betas_)
    grad_beta = -(1/m) * X_mat.T @ (y-p)
    betas_ = betas_ - lr * grad_beta

[0.5 0.5 0.5 ... 0.5 0.5 0.5] [0. 0. 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.] [-2.93000000e-03 -1.91188333e+00 -1.79416667e-03 -1.01899167e-01
 -2.09833333e-03]
[8.97463596e-142 7.84630527e-173 8.20007516e-197 ... 4.40088557e-116
 4.82604370e-192 2.66341720e-134] [-0.00086    -0.57545    -0.00085583 -0.0083525  -0.000655  ]
[1. 1. 1. ... 1. 1. 1.] [1.21000000e-03 7.60983333e-01 8.25000000e-05 8.51941667e-02
 7.88333333e-04]
[0. 0. 0. ... 0. 0. 0.] [-0.00672    -4.39921667 -0.00444417 -0.21215083 -0.00485167]
[0. 0. 0. ... 0. 0. 0.] [-0.00465    -3.06278333 -0.00350583 -0.11860417 -0.00340833]
[0. 0. 0. ... 0. 0. 0.] [-0.00258   -1.72635   -0.0025675 -0.0250575 -0.001965 ]
[1.84257307e-095 6.80105301e-116 3.07123248e-132 ... 1.33109755e-077
 3.35407546e-129 3.58511614e-090] [-0.00051    -0.38991667 -0.00162917  0.06848917 -0.00052167]
[ 1.  1. nan ...  1. nan  1.] [ 1.56000000e-03  9.46516667e-01 -6.90833333e-04  1.62035833e-01
  9.21666667e-04]
[nan nan nan ... nan nan nan] [nan nan nan nan n

  return np.exp(X @ B) / (1 + np.exp(X @ B))
  return np.exp(X @ B) / (1 + np.exp(X @ B))


In [84]:
np.exp(1000)/(1+np.exp(1000))

0.9999546021312976

In [90]:
np.Inf/np.Inf

nan

In [75]:
p

array([0., 0., 0., ..., 0., 0., 0.])