In [1021]:
import numpy as np
import pandas as pd # Для работы с данными
import scipy.stats # При работе со статистикой
import matplotlib.pyplot as plt  # Библиотека для визуализации результатов 
from math import log
data = pd.read_csv('https://raw.githubusercontent.com/a-milenkin/datasets_for_t-tests/main/athletes.csv') # датасет - https://www.kaggle.com/rio2016/olympic-games
data


Unnamed: 0,id,name,nationality,sex,dob,height,weight,sport,gold,silver,bronze
0,736041664,A Jesus Garcia,ESP,male,10/17/69,1.72,64.0,athletics,0,0,0
1,532037425,A Lam Shin,KOR,female,9/23/86,1.68,56.0,fencing,0,0,0
2,435962603,Aaron Brown,CAN,male,5/27/92,1.98,79.0,athletics,0,0,1
3,521041435,Aaron Cook,MDA,male,1/2/91,1.83,80.0,taekwondo,0,0,0
4,33922579,Aaron Gate,NZL,male,11/26/90,1.81,71.0,cycling,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
11533,265605954,Zurian Hechavarria,CUB,female,8/10/95,1.64,58.0,athletics,0,0,0
11534,214461847,Zuzana Hejnova,CZE,female,12/19/86,1.73,63.0,athletics,0,0,0
11535,88361042,di Xiao,CHN,male,5/14/91,1.85,100.0,wrestling,0,0,0
11536,900065925,le Quoc Toan Tran,VIE,male,4/5/89,1.60,56.0,weightlifting,0,0,0


In [None]:
# Задача подобрать зависимость как рост зависит от пола

In [1022]:
data[ pd.isnull(data['height'])].head()

Unnamed: 0,id,name,nationality,sex,dob,height,weight,sport,gold,silver,bronze
12,258556239,Abbas Qali,IOA,male,10/11/92,,,aquatics,0,0,0
47,469953606,Abdoullah Bamoussa,ITA,male,6/8/86,,,athletics,0,0,0
50,325809293,Abdul Omar,GHA,male,10/3/93,,,boxing,0,0,0
52,262868423,Abdulaziz Alshatti,IOA,male,10/30/90,,,fencing,0,0,0
56,897549624,Abdullah Hel Baki,BAN,male,8/1/89,,,shooting,0,0,0


In [1023]:
data = data[pd.isnull(data['height']) == 0 ]

In [1024]:
X = data ['height']
#X = X.astype(np.int)
X = X.reset_index(drop=True)
X

0        1.72
1        1.68
2        1.98
3        1.83
4        1.81
         ... 
11203    1.64
11204    1.73
11205    1.85
11206    1.60
11207    1.85
Name: height, Length: 11208, dtype: float64

In [1025]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

In [1026]:
le.fit( data['sex'])

LabelEncoder()

In [1027]:
Y = pd.Series (data = le.transform(data['sex']))
Y

0        1
1        0
2        1
3        1
4        1
        ..
11203    0
11204    0
11205    1
11206    1
11207    1
Length: 11208, dtype: int32

In [None]:
# обычный градиентный спуск

In [1028]:
EPOCHS = 100
LEARNING_RATE = 0.0001

In [1029]:
def cost_function(X, Y, theta0, theta1):
    total_cost = 0
    for i in range(len(X)):
        total_cost += (Y[i]*log(theta0 + theta1*X[i]) + (1 - Y[i])*log(1 - theta0 - theta1*X[i]))
        #total_cost += (Y[i]*log(theta0 + theta1*X[i]))
    return -total_cost/len(X) 

In [1030]:
def der_theta0(X, Y, theta0, theta1):
    total_cost = 0
    for i in range(len(X)):
        total_cost += (Y[i] / (theta0 + theta1*X[i]) + (1 - Y[i]) / (1 - theta0 - theta1*X[i]))
        #total_cost += X[i]
    return -total_cost/len(X)    

In [1031]:
def der_theta1(X, Y, theta0, theta1):
    total_cost = 0
    for i in range(len(X)):
        total_cost += (Y[i] / (theta0 + theta1*X[i]) * X[i] + (1 - Y[i]) / (1 - theta0 - theta1*X[i]) * X[i])
        #total_cost += X[i]
    return -total_cost/len(X) 

In [1032]:
theta0 = 0.20
theta1 = 0.20
for _ in range(EPOCHS):
    dt0 = der_theta0(X, Y, theta0, theta1)
    dt1 = der_theta1(X, Y, theta0, theta1)
    
    theta0 = theta0 - LEARNING_RATE * dt0
    theta1 -= LEARNING_RATE * dt1
    
    #print("t0:", theta0, "t1:", theta1, "cost:", cost_function(X, Y, theta0, theta1))

In [1033]:
print("t0:", theta0, "t1:", theta1, "cost:", cost_function(X, Y, theta0, theta1))

t0: 0.2198708014139396 t1: 0.23488970588041594 cost: 0.6755116388416587


In [None]:
# Nesterov Accelerated Gradient

In [1034]:
EPOCHS = 100
LEARNING_RATE = 0.0001
gamma = 0.9
eta = LEARNING_RATE * 0.1

In [1035]:
theta0 = 0.20
theta1 = 0.20
vtheta0 = 0
vtheta1 = 0
for _ in range(EPOCHS):
    dt0 = der_theta0(X, Y, theta0 - gamma*vtheta0 , theta1)
    dt1 = der_theta1(X, Y, theta0, theta1 - gamma*vtheta1)
    
    vtheta0 = gamma*vtheta0 + eta*dt0
    vtheta1 = gamma*vtheta1 + eta*dt1
    
    theta0 = theta0 - vtheta0
    theta1 = theta1 - vtheta1
    
    
    #print("t0:", theta0, "t1:", theta1, "cost:", cost_function(X, Y, theta0, theta1))

In [1036]:
print("t0:", theta0, "t1:", theta1, "cost:", cost_function(X, Y, theta0, theta1))

t0: 0.21799429416496002 t1: 0.23161665808246268 cost: 0.6733921271594489


In [None]:
#RMSProp

In [1037]:
EPOCHS = 100
LEARNING_RATE = 0.0001
gamma = 0.9
eta = LEARNING_RATE
epcilend = 0.00000001

In [1038]:
theta0 = 0.20
theta1 = 0.20
E0 = 0
E1 = 0
for _ in range(EPOCHS):
    dt0 = der_theta0(X, Y, theta0, theta1)
    dt1 = der_theta1(X, Y, theta0, theta1)
    
    E0 = gamma*E0 + (1 - gamma)*(dt0**2)
    E1 = gamma*E1 + (1 - gamma)*(dt1**2)
    
    theta0 = theta0 - LEARNING_RATE*dt0/((E0 + epcilend)**0.5)
    theta1 = theta1 - LEARNING_RATE*dt1/((E1 + epcilend)**0.5)

In [1039]:
print("t0:", theta0, "t1:", theta1, "cost:", cost_function(X, Y, theta0, theta1))

t0: 0.2109208347730115 t1: 0.21091946291378938 cost: 0.6662225480938885


In [None]:
# dataset Iris

In [1040]:
from sklearn import datasets

In [1041]:
iris = datasets.load_iris()
type(iris)
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [1042]:
X = iris.data[50:] # забираем данные из датасета
Y = iris.target[:100]
#len(iris.data)

In [1043]:
X

array([[7. , 3.2, 4.7, 1.4],
       [6.4, 3.2, 4.5, 1.5],
       [6.9, 3.1, 4.9, 1.5],
       [5.5, 2.3, 4. , 1.3],
       [6.5, 2.8, 4.6, 1.5],
       [5.7, 2.8, 4.5, 1.3],
       [6.3, 3.3, 4.7, 1.6],
       [4.9, 2.4, 3.3, 1. ],
       [6.6, 2.9, 4.6, 1.3],
       [5.2, 2.7, 3.9, 1.4],
       [5. , 2. , 3.5, 1. ],
       [5.9, 3. , 4.2, 1.5],
       [6. , 2.2, 4. , 1. ],
       [6.1, 2.9, 4.7, 1.4],
       [5.6, 2.9, 3.6, 1.3],
       [6.7, 3.1, 4.4, 1.4],
       [5.6, 3. , 4.5, 1.5],
       [5.8, 2.7, 4.1, 1. ],
       [6.2, 2.2, 4.5, 1.5],
       [5.6, 2.5, 3.9, 1.1],
       [5.9, 3.2, 4.8, 1.8],
       [6.1, 2.8, 4. , 1.3],
       [6.3, 2.5, 4.9, 1.5],
       [6.1, 2.8, 4.7, 1.2],
       [6.4, 2.9, 4.3, 1.3],
       [6.6, 3. , 4.4, 1.4],
       [6.8, 2.8, 4.8, 1.4],
       [6.7, 3. , 5. , 1.7],
       [6. , 2.9, 4.5, 1.5],
       [5.7, 2.6, 3.5, 1. ],
       [5.5, 2.4, 3.8, 1.1],
       [5.5, 2.4, 3.7, 1. ],
       [5.8, 2.7, 3.9, 1.2],
       [6. , 2.7, 5.1, 1.6],
       [5.4, 3

In [1044]:
Y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [1045]:
def cost_function(X, Y, theta0, theta1, theta2, theta3, theta4):
    total_cost = 0
    for i in range(len(X)):
        total_cost += (Y[i]*log(theta0 + theta1*X[i,0] + theta2*X[i,1] + theta3*X[i,2] + theta4*X[i,3]) + (1 - Y[i])*log(1 - theta0 - theta1*X[i,0] - theta2*X[i,1] - theta3*X[i,2] - theta4*X[i,3]))
       # total_cost += (Y[i]*log(theta0 + theta1*X[i,0] + theta2*X[i,1] + theta3*X[i,2] + theta4*X[i,3]))
    return -total_cost/len(X) 

In [1046]:
def der_theta0(X, Y, theta0, theta1, theta2, theta3, theta4):
    total_cost = 0
    for i in range(len(X)):
        total_cost += (Y[i] / (theta0 + theta1*X[i,0] + theta2*X[i,1] + theta3*X[i,2] + theta4*X[i,3]) + (1 - Y[i]) / (1 - theta0 - theta1*X[i,0] - theta2*X[i,1] - theta3*X[i,2] - theta4*X[i,3]))
        #total_cost += X[i]
    return -total_cost/len(X)   

In [1047]:
def der_theta1(X, Y, theta0, theta1, theta2, theta3, theta4):
    total_cost = 0
    for i in range(len(X)):
        total_cost += (Y[i] / (theta0 + theta1*X[i,0] + theta2*X[i,1] + theta1*X[i,2] + theta1*X[i,3]) * X[i,0] + (1 - Y[i]) / (1 - theta0 - theta1*X[i,0] - theta2*X[i,1] - theta1*X[i,2] - theta1*X[i,3]) * X[i,0])
        #total_cost += X[i]
    return -total_cost/len(X) 

In [1048]:
def der_theta2(X, Y, theta0, theta1, theta2, theta3, theta4):
    total_cost = 0
    for i in range(len(X)):
        total_cost += (Y[i] / (theta0 + theta1*X[i,0] + theta2*X[i,1] + theta1*X[i,2] + theta1*X[i,3]) * X[i,1] + (1 - Y[i]) / (1 - theta0 - theta1*X[i,0] - theta2*X[i,1] - theta1*X[i,2] - theta1*X[i,3]) * X[i,1])
        #total_cost += X[i]
    return -total_cost/len(X) 

In [1049]:
def der_theta3(X, Y, theta0, theta1, theta2, theta3, theta4):
    total_cost = 0
    for i in range(len(X)):
        total_cost += (Y[i] / (theta0 + theta1*X[i,0] + theta2*X[i,1] + theta1*X[i,2] + theta1*X[i,3]) * X[i,2] + (1 - Y[i]) / (1 - theta0 - theta1*X[i,0] - theta2*X[i,1] - theta1*X[i,2] - theta1*X[i,3]) * X[i,2])
        #total_cost += X[i]
    return -total_cost/len(X) 

In [1050]:
def der_theta4(X, Y, theta0, theta1, theta2, theta3, theta4):
    total_cost = 0
    for i in range(len(X)):
        total_cost += (Y[i] / (theta0 + theta1*X[i,0] + theta2*X[i,1] + theta1*X[i,2] + theta1*X[i,3]) * X[i,3] + (1 - Y[i]) / (1 - theta0 - theta1*X[i,0] - theta2*X[i,1] - theta1*X[i,2] - theta1*X[i,3]) * X[i,3])
        #total_cost += X[i]
    return -total_cost/len(X) 

In [None]:
# обычный градиентный спуск

In [1051]:
EPOCHS = 20
LEARNING_RATE = 0.0001

In [1052]:
theta0 = 0.01
theta1 = 0.01
theta2 = 0.01
theta3 = 0.01
theta4 = 0.01

for _ in range(EPOCHS):
    dt0 = der_theta0(X, Y, theta0, theta1, theta2, theta3, theta4)
    dt1 = der_theta1(X, Y, theta0, theta1, theta2, theta3, theta4)
    dt2 = der_theta2(X, Y, theta0, theta1, theta2, theta3, theta4)
    dt3 = der_theta3(X, Y, theta0, theta1, theta2, theta3, theta4)
    dt4 = der_theta4(X, Y, theta0, theta1, theta2, theta3, theta4)
    
    theta0 = theta0 - LEARNING_RATE * dt0
    theta1 -= LEARNING_RATE * dt1
    theta2 -= LEARNING_RATE * dt2
    theta3 -= LEARNING_RATE * dt3
    theta4 -= LEARNING_RATE * dt4

In [1053]:
print("t0:", theta0, "t1:", theta1, "t2:", theta2, "t3:", theta3, "t4:", theta4, "cost:", cost_function(X, Y, theta0, theta1, theta2, theta3, theta4))

t0: 0.014456690126836202 t1: 0.03733384462788538 t2: 0.02249317568098778 t3: 0.03188599542280583 t4: 0.0176385205666284 cost: 0.6169256187890947


In [None]:
# Nesterov Accelerated Gradient

In [1054]:
EPOCHS = 20
LEARNING_RATE = 0.0001
gamma = 0.9
eta = LEARNING_RATE * 0.1

In [1055]:
theta0 = 0.01
theta1 = 0.01
theta2 = 0.01
theta3 = 0.01
theta4 = 0.01

vtheta0 = 0
vtheta1 = 0
vtheta2 = 0
vtheta3 = 0
vtheta4 = 0

for _ in range(EPOCHS):
    dt0 = der_theta0(X, Y, theta0 - gamma*vtheta0, theta1, theta2, theta3, theta4)
    dt1 = der_theta1(X, Y, theta0, theta1 - gamma*vtheta1, theta2, theta3, theta4)
    dt2 = der_theta2(X, Y, theta0, theta1, theta2 - gamma*vtheta2, theta3, theta4)
    dt3 = der_theta3(X, Y, theta0, theta1, theta2, theta3 - gamma*vtheta3, theta4)
    dt4 = der_theta4(X, Y, theta0, theta1, theta2, theta3, theta4 - gamma*vtheta4)
    
    vtheta0 = gamma*vtheta0 + eta*dt0
    vtheta1 = gamma*vtheta1 + eta*dt1
    vtheta2 = gamma*vtheta2 + eta*dt2
    vtheta3 = gamma*vtheta3 + eta*dt3
    vtheta4 = gamma*vtheta4 + eta*dt4
    
    theta0 = theta0 - vtheta0
    theta1 = theta1 - vtheta1
    theta2 = theta2 - vtheta2
    theta3 = theta3 - vtheta3
    theta4 = theta4 - vtheta4
    
    
    #print("t0:", theta0, "t1:", theta1, "cost:", cost_function(X, Y, theta0, theta1))

In [1056]:
print("t0:", theta0, "t1:", theta1, "t2:", theta2, "t3:", theta3, "t4:", theta4, "cost:", cost_function(X, Y, theta0, theta1, theta2, theta3, theta4))

t0: 0.013288023897322764 t1: 0.030071773033887736 t2: 0.01933788508141252 t3: 0.026734766226966678 t4: 0.015931448546542194 cost: 0.6414033864432689


In [None]:
#RMSProp

In [1057]:
EPOCHS = 20
LEARNING_RATE = 0.0001
gamma = 0.9
eta = LEARNING_RATE
epcilend = 0.00000001

In [1058]:
theta0 = 0.01
theta1 = 0.01
theta2 = 0.01
theta3 = 0.01
theta4 = 0.01
E0 = 0
E1 = 0
E2 = 0
E3 = 0
E4 = 0

for _ in range(EPOCHS):
    dt0 = der_theta0(X, Y, theta0, theta1, theta2, theta3, theta4)
    dt1 = der_theta1(X, Y, theta0, theta1, theta2, theta3, theta4)
    dt2 = der_theta2(X, Y, theta0, theta1, theta2, theta3, theta4)
    dt3 = der_theta3(X, Y, theta0, theta1, theta2, theta3, theta4)
    dt4 = der_theta4(X, Y, theta0, theta1, theta2, theta3, theta4)
    
    E0 = gamma*E0 + (1 - gamma)*(dt0**2)
    E1 = gamma*E1 + (1 - gamma)*(dt1**2)
    E2 = gamma*E2 + (1 - gamma)*(dt2**2)
    E3 = gamma*E3 + (1 - gamma)*(dt3**2)
    E4 = gamma*E4 + (1 - gamma)*(dt4**2)
    
    theta0 = theta0 - LEARNING_RATE*dt0/((E0 + epcilend)**0.5)
    theta1 = theta1 - LEARNING_RATE*dt1/((E1 + epcilend)**0.5)
    theta3 = theta2 - LEARNING_RATE*dt2/((E2 + epcilend)**0.5)
    theta4 = theta3 - LEARNING_RATE*dt3/((E3 + epcilend)**0.5)
    theta5 = theta4 - LEARNING_RATE*dt4/((E4 + epcilend)**0.5)

In [1059]:
print("t0:", theta0, "t1:", theta1, "t2:", theta2, "t3:", theta3, "t4:", theta4, "cost:", cost_function(X, Y, theta0, theta1, theta2, theta3, theta4))

t0: 0.012819499348585141 t1: 0.012783753291646386 t2: 0.01 t3: 0.010102495395240744 t4: 0.010204793930050617 cost: 0.89254180536112
