In [1]:
# Random Forest qurish uchun DecisionTreeRegressor va DecisionTreeClassifier 
from sklearn.tree import DecisionTreeRegressor,DecisionTreeClassifier 
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier

# Sklearn metrics 
from sklearn.metrics import  mean_squared_error, mean_absolute_error
from sklearn.metrics import accuracy_score


import numpy as np  # Matritsa va massivlar bilan ishlovchi modul
import pandas as pd # Ma'lumotlar bilan ishlovchi modul
# Sklearn datasets and train test split
from sklearn import datasets
from sklearn.model_selection import train_test_split

from collections import Counter 

# Random Forest Regressor

In [2]:
class CustomRandomForestRegressor:
    '''
    Random Forest algoritmi 0 dam
    '''
    def __init__(self, num_trees=25, min_samples_split=2, max_depth=5):
        """
        Konstruktor
        """
        self.num_trees = num_trees
        self.min_samples_split = min_samples_split
        self.max_depth = max_depth
        # Decision Tree larni saqlash uchun list
        self.decision_trees = []
        
    @staticmethod
    def _sample(X, y):
        '''
        Ma'lumotlarni randmo tanlovchi funksiya
        '''
        n_rows, n_cols = X.shape
        #
        samples = np.random.choice(a=n_rows, size=n_rows, replace=True)
        return X[samples], y[samples]
        
    def fit(self, X, y):
        '''
        Random Forestni train qilish funksiyasi
        
        :param X: np.array, atributlari(features)
        :param y: np.array, klasslari(target)
        :return: None
        '''

        if len(self.decision_trees) > 0:
            self.decision_trees = []
            
        # Random Forestning Decision Tree (daraxtlarini qurish)
        num_built = 0
        while num_built < self.num_trees:
            try:
                clf = DecisionTreeRegressor(
                    min_samples_split=self.min_samples_split,
                    max_depth=self.max_depth
                )
                # Random ma'lumotlarni olish
                _X, _y = self._sample(X, y)
                # Decision Tree larni oqitish 
                clf.fit(_X, _y)
                # Oqitilgan Decision Tree lani listga saqlash
                self.decision_trees.append(clf)
                num_built += 1
            except Exception as e:
                continue
    
    def predict(self, X):
        '''
        Yangi ma'lumotni bashorat qilish uchun predict fuksiyasi
        
        :param X: np.array, yangi ma'lumot predict uchun
        :return: class
        '''
        # Har bir Decision Tree ni predict qilgan qiymatlarini olyapmiz va listga yig'yapmiz
        y = []
        
        for x in X:
            y_x = []
            for tree in self.decision_trees:
                y_x.append(tree.predict([x]))
            y.append(np.array(y_x).mean())
        
        return np.array(y)

# Random Forest Classifier

In [3]:
class CustomRandomForestClassifier:
    '''
    Random Forest algoritmi 0 dam
    '''
    def __init__(self, num_trees=25, min_samples_split=2, max_depth=5):
        """
        Konstruktor
        """
        self.num_trees = num_trees
        self.min_samples_split = min_samples_split
        self.max_depth = max_depth
        # Decision Tree larni saqlash uchun list
        self.decision_trees = []
        
    @staticmethod
    def _sample(X, y):
        '''
        Ma'lumotlarni randmo tanlovchi funksiya
        '''
        n_rows, n_cols = X.shape
        #
        samples = np.random.choice(a=n_rows, size=n_rows, replace=True)
        return X[samples], y[samples]
        
    def fit(self, X, y):
        '''
        Random Forestni train qilish funksiyasi
        
        :param X: np.array, atributlari(features)
        :param y: np.array, klasslari(target)
        :return: None
        '''

        if len(self.decision_trees) > 0:
            self.decision_trees = []
            
        # Random Forestning Decision Tree (daraxtlarini qurish)
        num_built = 0
        while num_built < self.num_trees:
            try:
                clf = DecisionTreeClassifier(
                    min_samples_split=self.min_samples_split,
                    max_depth=self.max_depth
                )
                # Random ma'lumotlarni olish
                _X, _y = self._sample(X, y)
                # Decision Tree larni oqitish 
                clf.fit(_X, _y)
                # Oqitilgan Decision Tree lani listga saqlash
                self.decision_trees.append(clf)
                num_built += 1
            except Exception as e:
                continue
    
    def predict(self, X):
        '''
        Yangi ma'lumotni bashorat qilish uchun predict fuksiyasi
        
        :param X: np.array, yangi ma'lumot predict uchun
        :return: class
        '''
        # Har bir Decision Tree ni predict qilgan qiymatlarini olyapmiz va listga yig'yapmiz
        y = []
        for tree in self.decision_trees:
            y.append(tree.predict(X))
        
        # Eng ko'p bashorat qilingan qiymatlarni topish uchun litni qayta shakllantiryapmiz
        y = np.swapaxes(a=y, axis1=0, axis2=1)
        
        # Oxirgi predict qiymatni qaytaradi
        predictions = []
        for preds in y:
            counter = Counter(preds)
            predictions.append(counter.most_common(1)[0][0])
        return predictions

# Test Classifier

In [4]:
iris = datasets.load_iris()
print(iris["DESCR"])
X = iris["data"]
y = iris["target"]
labels = iris["target_names"]
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X, y, test_size=0.2, random_state=1)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [5]:
custom_rf_classifier = CustomRandomForestClassifier()

In [6]:
custom_rf_classifier.fit(X_train_class, y_train_class)

In [7]:
custom_preds_class = custom_rf_classifier.predict(X_test_class)

print("Accuracy : ", accuracy_score(y_test_class, custom_preds_class))

Accuracy :  0.9666666666666667


In [8]:
sk_rf_class = RandomForestClassifier(n_estimators=25, min_samples_split=2, max_depth=5)

In [9]:
sk_rf_class.fit(X_train_class, y_train_class)

In [10]:
sk_pred_class = sk_rf_class.predict(X_test_class)
print("Accuacy : ", accuracy_score(y_test_class ,sk_pred_class))

Accuacy :  0.9666666666666667


# Test Regressor

In [12]:
data = datasets.load_diabetes()
print(data["DESCR"])
X = data["data"]
y = data["target"]
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X, y, test_size=0.2, random_state=1)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - age     age in years
      - sex
      - bmi     body mass index
      - bp      average blood pressure
      - s1      tc, total serum cholesterol
      - s2      ldl, low-density lipoproteins
      - s3      hdl, high-density lipoproteins
      - s4      tch, total cholesterol / HDL
      - s5      ltg, possibly log of serum triglycerides level
      - s6      glu, blood sugar level

Note: Each of these 1

In [13]:
custom_rf_regressor = CustomRandomForestRegressor()

In [14]:
custom_rf_regressor.fit(X_train_reg, y_train_reg)

In [15]:
cutom_pred = custom_rf_regressor.predict(X_test_reg)

In [16]:
print("MSE : ", mean_squared_error(y_test_reg, cutom_pred))
print("MAE : ", mean_absolute_error(y_test_reg, cutom_pred))

MSE :  3784.8688151411125
MAE :  46.99514782685625


In [17]:
sk_rf_reg = RandomForestRegressor(n_estimators=25, min_samples_split=2, max_depth=5)

In [18]:
sk_rf_reg.fit(X_train_reg, y_train_reg)

In [19]:
sk_pred_reg = sk_rf_reg.predict(X_test_reg)
print("MSE : ", mean_squared_error(y_test_reg, sk_pred_reg))
print("MAE : ", mean_absolute_error(y_test_reg, sk_pred_reg))

MSE :  3548.1589211357464
MAE :  45.19919062929809
