In [13]:
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.preprocessing import OrdinalEncoder
import pandas as pd
import numpy as np

# Decision Tree Regression
<hr>
<br>

**`Standart Devation`**

<Br>
$Std = \sqrt{\dfrac{\sum{ (x-myu)^2}} {n}}$
<br>
$myu = \dfrac{\sum{x}}{n}$
<br>
<hr>
<br>
    
**`Standart Devation Reduction`**
<br>
$SDTR = STD(Label) - STD(Feature, Label)$
    

In [14]:
data = pd.read_csv("Datasets/DTree.csv")
data

Unnamed: 0,Ob-Havo,Temp,Namlik,Shamol,Oyinchilar
0,Yomg'ir,Issiq,Yuqori,False,25
1,Yomg'ir,Issiq,Yuqori,True,30
2,Bulutli,Issiq,Yuqori,False,46
3,Quyoshli,Yaxshi,Yuqori,False,45
4,Quyoshli,Salqin,Normal,False,52
5,Quyoshli,Salqin,Normal,True,23
6,Bulutli,Salqin,Normal,True,43
7,Yomg'ir,Yaxshi,Yuqori,False,35
8,Yomg'ir,Salqin,Normal,False,38
9,Quyoshli,Yaxshi,Normal,False,46


In [15]:
feature = data.drop('Oyinchilar', axis=1).to_numpy()
feature

array([["Yomg'ir", 'Issiq', 'Yuqori', False],
       ["Yomg'ir", 'Issiq', 'Yuqori', True],
       ['Bulutli', 'Issiq', 'Yuqori', False],
       ['Quyoshli', 'Yaxshi', 'Yuqori', False],
       ['Quyoshli', 'Salqin', 'Normal', False],
       ['Quyoshli', 'Salqin', 'Normal', True],
       ['Bulutli', 'Salqin', 'Normal', True],
       ["Yomg'ir", 'Yaxshi', 'Yuqori', False],
       ["Yomg'ir", 'Salqin', 'Normal', False],
       ['Quyoshli', 'Yaxshi', 'Normal', False],
       ["Yomg'ir", 'Yaxshi', 'Normal', True],
       ['Bulutli', 'Yaxshi', 'Yuqori', True],
       ['Bulutli', 'Issiq', 'Normal', False],
       ['Quyoshli', 'Yaxshi', 'Yuqori', True]], dtype=object)

In [16]:
ordinal = OrdinalEncoder()
feature = ordinal.fit_transform(feature)
feature

array([[2., 0., 1., 0.],
       [2., 0., 1., 1.],
       [0., 0., 1., 0.],
       [1., 2., 1., 0.],
       [1., 1., 0., 0.],
       [1., 1., 0., 1.],
       [0., 1., 0., 1.],
       [2., 2., 1., 0.],
       [2., 1., 0., 0.],
       [1., 2., 0., 0.],
       [2., 2., 0., 1.],
       [0., 2., 1., 1.],
       [0., 0., 0., 0.],
       [1., 2., 1., 1.]])

In [32]:
target = data['Oyinchilar'].to_numpy()
target

array([25, 30, 46, 45, 52, 23, 43, 35, 38, 46, 48, 52, 44, 30])

In [24]:
obhavo = data.groupby("Ob-Havo").agg(Std = ("Oyinchilar", lambda x: x.std(ddof=0)), Soni = ("Oyinchilar", np.count_nonzero)).sort_values("Std")
obhavo

Unnamed: 0_level_0,Std,Soni
Ob-Havo,Unnamed: 1_level_1,Unnamed: 2_level_1
Bulutli,3.49106,4
Yomg'ir,7.782031,5
Quyoshli,10.870143,5


In [26]:
namlik = data.groupby("Namlik").agg(Std = ("Oyinchilar", lambda x : x.std(ddof=0)), Soni = ("Oyinchilar", np.count_nonzero)).sort_values("Std")
namlik

Unnamed: 0_level_0,Std,Soni
Namlik,Unnamed: 1_level_1,Unnamed: 2_level_1
Normal,8.734169,7
Yuqori,9.363411,7


In [27]:
temp = data.groupby("Temp").agg(Std = ("Oyinchilar", lambda x: x.std(ddof=0)), Soni = ("Oyinchilar", np.count_nonzero)).sort_values("Std")
temp

Unnamed: 0_level_0,Std,Soni
Temp,Unnamed: 1_level_1,Unnamed: 2_level_1
Yaxshi,7.65216,6
Issiq,8.954747,4
Salqin,10.511898,4


In [31]:
shamol = data.groupby("Shamol").agg(Std = ("Oyinchilar", lambda x: x.std(ddof=0)), Soni = ("Oyinchilar", np.count_nonzero)).sort_values("Std")
shamol

Unnamed: 0_level_0,Std,Soni
Shamol,Unnamed: 1_level_1,Unnamed: 2_level_1
False,7.873016,8
True,10.593499,6


In [35]:
stdr_obhavo = target.std() - np.dot(obhavo["Soni"]/14, obhavo["Std"])
stdr_obhavo

1.6621503366302335

In [37]:
stdr_temp = target.std() - np.dot(temp["Soni"]/14, temp["Std"])
stdr_temp

0.4796905747633211

In [38]:
stdr_shamol = target.std() - np.dot(shamol["Soni"]/14, shamol["Std"])
stdr_shamol 

0.28214938055733185

In [39]:
stdr_namlik = target.std() -  np.dot(namlik['Soni']/14, namlik['Std'])
stdr_namlik

0.272296195489826

# Decision Tree Classification
<hr>
<br>

**`Entropy`**

$E(S) = \sum_{i=1}^n - p_i log_2 p_i$

<hr>
<br>

**`Information Gain`**
<br>
$Gain(S,A) = E(S) - \sum_{(v Values(A))}  \dfrac{S}{S_v} E(S_v)  $

In [7]:
def entropy_label(label):
    counts = np.bincount(label) # -> [count(0), count(1)]
    presents = counts / len(label)
    
    entropy = 0
    for i in presents:
        if i > 0:
            entropy += i * np.log2(i)
    return -entropy    

In [None]:
def gain()