In [1]:
import pandas as pd
import numpy as np
import random
import multiprocessing

In [2]:
class MyForestReg:
    def __init__(self, n_estimators = 10, max_features = 0.5, max_samples = 0.5, random_state = 42, max_depth = 5,  min_samples_split = 2, max_leafs = 20, bins = 16, oob_score = None):
        self.n_estimators = n_estimators
        self.max_features = max_features
        self.max_samples = max_samples
        self.random_state = random_state
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.max_leafs = max_leafs
        self.bins = bins
        self.oob_score = oob_score
        self.leafs_cnt = 0
        self.pred_sum = 0
        self.trees_ = []
        
    def __repr__(self):
        return f"""MyForestReg class: n_estimators={self.n_estimators}, max_features= {self.max_features}, 
        max_samples={self.max_samples},max_depth={self.max_depth}, min_samples_split={self.min_samples_split},
        max_leafs={self.max_leafs}"""
        
    def _mse(self, y):
        y_mean = np.mean(y)
        return np.mean((y - y_mean) ** 2)
        
    def _calculate_oob(self, y_true, y_pred):
        
        if self.oob_score == "mae":
            return np.mean(np.abs(y_true - y_pred))
        elif self.oob_score == "mse":
            return np.mean((y_true - y_pred) ** 2)
        elif self.oob_score == "rmse":
            return np.sqrt(np.mean((y_true - y_pred) **2))
        elif self.oob_score == "mape":
            return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
        elif self.oob_score == "r2":
            y_mean = np.mean(y_true)
            rss = np.sum((y_true - y_pred) ** 2)
            tss = np.sum((y_true - y_mean) ** 2)
            return 1 - rss/tss
        else:
            raise ValueError("Неправильная метрика")
        
    def _get_best_split(self, X, y):

        N, n_features = X.shape
        I_p = self._mse(y)
        gain = 0
        split_value = 0
        col_index  = None
        for j in range(n_features):
            thresholds = self.global_thresholds_[j]
            for t in thresholds:
                mask_left = X[:, j] <= t
                mask_right = X[:, j] > t
                if mask_left.sum() == 0 or mask_right.sum() == 0:
                    continue
                X_r, y_r = X[mask_right], y[mask_right]
                X_l, y_l = X[mask_left], y[mask_left]
                
                I_r, I_l = self._mse(y_r), self._mse(y_l)
                N_r, N_l = len(y_r), len(y_l)
                
                IG = I_p - N_r / N * I_r - N_l/N * I_l
                if IG > gain:
                    gain = IG
                    split_value = t
                    col_index = j
                        
        return col_index, split_value, gain
    def _build_tree(self, X_train, y_train, feature_names, depth = 0):
        
        stop_reasons = []
        if depth >= self.max_depth:
                stop_reasons.append("max_depth")
        
        if len(np.unique(y_train)) == 1:
            stop_reasons.append("pure_node")
    
        if len(y_train) == 1:
            stop_reasons.append("single_sample")
    
        if len(y_train) < self.min_samples_split:
            stop_reasons.append("min_samples_split")
    
        if self.potential_leafs >= self.max_leafs:
            stop_reasons.append("max_leafs")
            
        if stop_reasons:
            pred = np.mean(y_train)
            self.pred_sum += pred
            self.leafs_cnt += 1                      
            return {
                "type" : "leaf",
                "prediction" : pred,
                "n_samples" : len(y_train),
                "depth" : depth,
                }
        
        best_feature, best_split, ig  = self._get_best_split(X_train, y_train)
        
        if ig <= 0:
            pred = np.mean(y_train)
            self.pred_sum += pred
            self.leafs_cnt += 1
            return {
                "type" : "leaf",
                "prediction" : pred,
                "n_samples" : len(y_train),
                "depth" : depth,
            }
        n_samples_node = len(y_train)
        fn = feature_names[best_feature]
        self.fi[fn] += n_samples_node / self.n_samples * ig
        self.potential_leafs += 1
        
        mask_left = X_train[:, best_feature] <= best_split
        mask_right = X_train[:, best_feature] > best_split
        X_r, y_r = X_train[mask_right], y_train[mask_right]
        X_l, y_l = X_train[mask_left], y_train[mask_left]
        left_subtree = self._build_tree(X_l, y_l, feature_names,depth +1)
        right_subtree = self._build_tree(X_r, y_r, feature_names, depth +1)
        
        return {
            "type" : "node",
            'feature' : best_feature,
            'split' : best_split,
            'feature_name' : fn,
            'depth' : depth,
            "n_samples" : n_samples_node,
            'child_left' : left_subtree,
            'child_right' : right_subtree,        
        }
    def _fit_tree(self, X_train, y_train, feature_names):
        
        if self.max_leafs < 2:
            self.max_leafs = 2    
        self.potential_leafs = 1
        self.global_thresholds_ = []
        for j in range(X_train.shape[1]):
            features = X_train[:, j]
            f = np.sort(np.unique(features))
            native_thresholds = (f[:-1] + f[1:]) / 2 
            if self.bins is None:
                thresholds = native_thresholds
            else:
                if self.bins - 1 > len(native_thresholds):
                    thresholds = native_thresholds
                else:
                    thresholds = np.histogram(X_train[:, j], self.bins)[1][1:-1]
            self.global_thresholds_.append(thresholds)
            
        return self._build_tree(X_train, y_train, feature_names, depth = 0)
    def fit(self, X, y):
        random.seed(self.random_state)
        self.n_samples, self.n_features = X.shape
        features = list(X.columns)
        X_train, y_train = X.to_numpy(), y.to_numpy()
        self.fi = {f : 0 for f in features}
        
        for i in range(self.n_estimators):
            cols_idx = random.sample(range(self.n_features), round(self.n_features * self.max_features))
            rows_idx = random.sample(range(self.n_samples), round(self.n_samples * self.max_samples))
            X_train_sample, y_train_sample = X_train[rows_idx][:, cols_idx], y_train[rows_idx]
            selected_features = [features[k] for k in cols_idx]
            tree = self._fit_tree(X_train_sample, y_train_sample, selected_features)
            self.trees_.append({
                "tree" : tree,
                "rows" : rows_idx,
                "cols" : cols_idx
            })

        if self.oob_score:
            oob_sum = np.zeros(self.n_samples)
            oob_count = np.zeros(self.n_samples)
            all_idx = np.arange(self.n_samples)
            for t in self.trees_:
                tree = t["tree"]
                rows_idx = t['rows']
                cols_idx = t['cols']
                oob_idx = np.setdiff1d(all_idx, rows_idx)
                X_oob = X_train[oob_idx][:, cols_idx]
                oob_preds = np.zeros(X_oob.shape[0])
                for i in range(X_oob.shape[0]):
                    node = tree
                    while node["type"] != "leaf":
                
                        feature_number = node['feature']
                        predicat = node['split']
                        if X_oob[i, feature_number] <= predicat:
                            node = node['child_left']
                        else:
                            node = node['child_right']
                    result = node['prediction']
                    oob_preds[i] = result    
                oob_sum[oob_idx] += oob_preds
                oob_count[oob_idx] += 1
            oob_mask = oob_count > 0
            oob_pred_mean = oob_sum[oob_mask]/oob_count[oob_mask]
            oob_pred_true = y_train[oob_mask]
            self.oob_score_ = self._calculate_oob(oob_pred_true, oob_pred_mean)
            
    def predict(self, X):
        features = list(X.columns)
        X_test = X.to_numpy()
        n_test_samples = X.shape[0]
        preds = np.zeros((n_test_samples, len(self.trees_)))
        
        for t in range(len(self.trees_)):
            for i in range(n_test_samples):
                tree = self.trees_[t]
                node = tree["tree"]
                while node["type"] != "leaf":
                    feature_name = node['feature_name']
                    feature_number = features.index(feature_name)
                    predicat = node['split']
                    if X_test[i, feature_number] <= predicat:
                        node = node['child_left']
                    else:
                        node = node['child_right']
                result = node['prediction']
                preds[i,t] = result
        mean_preds = np.mean(preds, axis = 1)
        return mean_preds

In [3]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
data = load_diabetes(as_frame=True)
X, y = data['data'], data['target']
X,y = pd.DataFrame(X), pd.DataFrame(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42, test_size = .3)

In [4]:
X_test.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
287,0.045341,-0.044642,-0.006206,-0.015999,0.125019,0.125198,0.019187,0.034309,0.032432,-0.00522
211,0.092564,-0.044642,0.036907,0.021872,-0.02496,-0.016658,0.000779,-0.039493,-0.022517,-0.021788
72,0.063504,0.05068,-0.00405,-0.012556,0.103003,0.04879,0.056003,-0.002592,0.084492,-0.017646
321,0.096197,-0.044642,0.051996,0.079265,0.054845,0.036577,-0.076536,0.141322,0.098648,0.061054
73,0.012648,0.05068,-0.020218,-0.002228,0.038334,0.053174,-0.006584,0.034309,-0.005142,-0.009362


In [5]:
rf = MyForestReg(max_depth = 20, oob_score = 'mae')
rf.fit(X_train, y_train)

In [6]:
rf.predict(X_test)

array([165.23206132, 162.68654252, 153.37955927, 202.90971249,
       133.11847197, 115.1401591 , 197.87304582, 197.87304582,
       164.73678098, 154.42389237, 112.61318474, 157.67002011,
        99.45155345, 202.90971249, 120.42519227, 141.78919896,
       197.87304582, 202.90971249, 177.3601223 , 198.6607657 ,
       186.914134  , 121.36814445,  94.18386114, 198.6607657 ,
       148.88922571, 177.65428896, 195.45428896, 163.19254242,
        99.27186114, 124.73722197, 169.54580619, 152.99401979,
       186.914134  , 189.11729928, 179.16159957, 191.57341392,
       141.42619527, 139.03606224, 172.48796594,  94.6808355 ,
       112.38082174, 138.04144215, 154.22630536, 163.54969043,
       158.80383199, 104.26364136, 115.35282931, 120.85200275,
        94.77719448, 175.60131482, 137.95870797, 119.86116265,
       167.45087866, 114.62401741, 185.69675829, 142.55634899,
       118.14785141, 193.62409903,  94.02166093, 100.62973786,
       186.914134  , 186.914134  , 136.59973361, 137.74

In [7]:
rf.fi

{'age': 485.6933047770563,
 'sex': 178.3761393714827,
 'bmi': 3510.301432951824,
 'bp': 1507.9156299722792,
 's1': 400.5022095658077,
 's2': 101.23669543329737,
 's3': 1168.6673792094166,
 's4': 830.0066263359143,
 's5': 2596.4955855276876,
 's6': 996.8079538880509}

In [8]:
rf.oob_score_

72.19009369809075

In [316]:
class MyTreeClf:
    def __init__(self, max_depth = 5, min_samples_split = 2, max_leafs  = 20, bins = None, criterion = "entropy"):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.max_leafs = max_leafs
        self.leafs_cnt = 0
        self.pred_sum = 0
        self.potential_leafs = 1
        self.bins = bins
        self.criterion = criterion
        self.fi = {}
       
    def _entropy(self, y):
        epsilon = 1e-12
        p0 = np.sum(y == 0) / len(y)
        p1 = np.sum(y == 1) / len(y)
        entropy = - (p0 * np.log2(p0 + epsilon) + p1 * np.log2(p1 + epsilon))
        return entropy

    def _gini(self, y):
        p0 = np.sum(y==0) / len(y)
        p1 = np.sum(y==1) / len(y)
        gini = 1 - (p0 ** 2 + p1**2)
        return gini

    def _calculate_criterion(self, y):
        if self.criterion == "entropy":
            return self._entropy(y)
        elif self.criterion == "gini":
            return self._gini(y)
        else:
            raise ValueError("criterion must be 'entropy' or 'gini'")
            
    def __repr__(self):
        return f"MyTreeClf class: max_depth={self.max_depth}, min_samples_split={self.min_samples_split}, max_leafs={self.max_leafs}"
        
    def _get_best_split(self, X, y):
       
        N, n_features = X.shape
        S_0 = self._calculate_criterion(y)
        max_IG = 0
        best_split = 0
        feature_index = 0
        
        for j in range(n_features):
            thresholds = self.global_thresholds_[j]
            for t in thresholds:
                mask_left = X[:, j] <= t
                mask_right = X[:, j] > t
                if mask_left.sum() == 0 or mask_right.sum() == 0:
                    continue
                X_r, y_r = X[mask_right], y[mask_right]
                X_l, y_l = X[mask_left], y[mask_left]
                S_r = self._calculate_criterion(y_r)
                S_l = self._calculate_criterion(y_l)
                N_r = len(y_r)
                N_l = len(y_l)
                IG = S_0 - N_r / N * S_r - N_l/N * S_l
                if IG > max_IG:
                    max_IG = IG
                    best_split = t
                    feature_index = j
                        
        return feature_index, max_IG, best_split

        
    def _build_tree(self, X_train, y_train, depth = 0):
        
        stop_reasons = []
        if depth >= self.max_depth:
                stop_reasons.append("max_depth")
        
        if len(np.unique(y_train)) == 1:
            stop_reasons.append("pure_node")
    
        if len(y_train) == 1:
            stop_reasons.append("single_sample")
    
        if len(y_train) < self.min_samples_split:
            stop_reasons.append("min_samples_split")
    
        if self.potential_leafs >= self.max_leafs:
            stop_reasons.append("max_leafs")
            
        if stop_reasons:
            pred = np.mean(y_train)
            self.pred_sum += pred
            self.leafs_cnt += 1            
            impurity = self._calculate_criterion(y_train)           
            return {
                "type" : "leaf",
                "prediction" : pred,
                "n_samples" : len(y_train),
                "depth" : depth,
                }
        
        best_feature, ig, best_split = self._get_best_split(X_train, y_train)
        
        if ig <= 0:
            pred = np.mean(y_train)
            self.pred_sum += pred
            self.leafs_cnt += 1
            return {
                "type" : "leaf",
                "prediction" : pred,
                "n_samples" : len(y_train),
                "depth" : depth,
            }
        n_samples_node = len(y_train)
        fn = self.feature_names[best_feature]
        self.fi[fn] += n_samples_node / self.n_samples * ig
        
        self.potential_leafs += 1
        
        mask_left = X_train[:, best_feature] <= best_split
        mask_right = X_train[:, best_feature] > best_split
        X_r, y_r = X_train[mask_right], y_train[mask_right]
        X_l, y_l = X_train[mask_left], y_train[mask_left]
        left_subtree = self._build_tree(X_l, y_l, depth +1)
        right_subtree = self._build_tree(X_r, y_r, depth +1)
        
        return {
            "type" : "node",
            'feature' : best_feature,
            'split' : best_split,
            'feature_name' : fn,
            'depth' : depth,
            "n_samples" : n_samples_node,
            'leaf_left' : left_subtree,
            'leaf_right' : right_subtree,        
        }
        
    def fit(self, X, y, feature_names = None):
        if self.max_leafs < 2:
            self.max_leafs = 2
        self.leafs_count = 0
        self.pred_sum = 0
        
        if feature_names is not None:
            self.feature_names = feature_names
        else:
            self.feature_names = X.columns.to_list()
            
        if isinstance(X, pd.DataFrame):
            X_train = X.to_numpy()
        else:
            X_train = np.asarray(X)

        if isinstance(y, pd.DataFrame):
            y_train = y.to_numpy()
        else:
            y_train = np.asarray(y)
        self.fi = {f : 0 for f in self.feature_names}
      
        self.n_samples = X.shape[0]
        self.global_thresholds_ = []
        for j in range(X_train.shape[1]):
            features = X_train[:, j]
            f = np.sort(np.unique(features))
            native_thresholds = (f[:-1] + f[1:]) / 2 
            if self.bins is None:
                thresholds = native_thresholds
            else:
                if self.bins >= len(native_thresholds):
                    thresholds = native_thresholds
                else:
                    thresholds = np.histogram(X_train[:, j], self.bins)[1][1:-1]
            self.global_thresholds_.append(thresholds)
            
        self.tree_ = self._build_tree(X_train, y_train, depth = 0)
           
            
    def print_tree(self, node = None, path = "1", side = None):
        if node is None:
            node = self.tree_
        if node["type"] == "leaf":
            if side is not None:
                print(' '*node['depth'], f"{path}.{side} - {node['prediction']}")
            else:
                print(f"{path} - {node['prediction']}")
            return
        feature = node["feature_name"]
        split = node["split"]
        depth = node['depth']
        print(' '*depth, f"{path} - {feature} > {split}")
        self.print_tree(node["leaf_left"], path + ".1", side = "left") 
        self.print_tree(node["leaf_right"], path + ".2", side = "right")

    def predict_proba(self, X, feature_names):
       
        X_test = X.to_numpy()
        n_samples = X.shape[0]
        probas = np.zeros(n_samples)
        for i in range(n_samples):
            node = self.tree_
            while node["type"] != "leaf":
                feature_name = node['feature_name']
                feature_number = feature_names.index(feature_name)
                predicat = node['split']
                if X_test[i, feature_number] <= predicat:
                    node = node['leaf_left']
                else:
                    node = node['leaf_right']
            result = node['prediction']
            probas[i] = result
        return probas
        
    def predict(self, X, feature_names):
        probas = self.predict_proba(X, feature_names)
        pred = (probas > 0.5).astype(int)
        return pred
               

In [298]:
class MyForestClf:
    def __init__(self, n_estimators = 10, max_features = 0.5, max_samples = 0.5, random_state = 42, max_depth = 5,  
                 min_samples_split = 2, max_leafs = 20, bins = 16, criterion = "entropy"):
        self.n_estimators = n_estimators
        self.max_features = max_features
        self.max_samples = max_samples
        self.random_state = random_state
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.max_leafs = max_leafs
        self.bins = bins
        self.criterion = criterion
        self.trees = []
        self.leafs_cnt = 0
        
    def __repr__(self):
        return f"""MyForestClf class: n_estimators={self.n_estimators}, max_features= {self.max_features}, 
        max_samples={self.max_samples},max_depth={self.max_depth}, min_samples_split={self.min_samples_split},
        max_leafs={self.max_leafs}, bins={self.bins}, criterion={self.criterion}, random_state={self.random_state}"""

    def fit(self, X, y):
        random.seed(self.random_state)
        self.n_samples, self.n_features = X.shape
        self.feature_names = list(X.columns)
        self.fi = {f : 0 for f in self.feature_names}
        if isinstance(X, pd.DataFrame):
            X_train = X.to_numpy()
        else:
            X_train = np.asarray(X)

        if isinstance(y, pd.DataFrame):
            y_train = y.to_numpy()
        else:
            y_train = np.asarray(y)
            
        for i in range(self.n_estimators):
            
            tree = MyTreeClf( max_depth = self.max_depth, min_samples_split = self.min_samples_split, 
                             max_leafs  = self.max_leafs, bins = self.bins, criterion = self.criterion)

            cols_idx = random.sample(range(self.n_features), round(self.n_features * self.max_features))
            rows_idx = random.sample(range(self.n_samples), round(self.n_samples * self.max_samples))
            feature_names_sample = [self.feature_names[k] for k in cols_idx]
            print(feature_names_sample, cols_idx)
            X_train_sample = X_train[rows_idx][:, cols_idx]
            y_train_sample = y_train[rows_idx]

            tree.fit(X_train_sample,y_train_sample, feature_names_sample)
            self.trees.append({
                "tree" : tree,
                "cols_idx" : cols_idx
            })
            self.leafs_cnt += tree.leafs_cnt
            for f, imp in tree.fi.items():
                self.fi[f] = imp
    def predict(self, X, type_ = "mean"):
        predictions = np.zeros((X.shape[0], self.n_estimators))
        for i in range(self.n_estimators):
            tree = self.trees[i]["tree"]
            tree_features = self.trees[i]["cols_idx"]
            tree_feature_names = [self.feature_names[k] for k in tree_features]
            if type_ == "mean":
                y_pred = tree.predict_proba(X.iloc[:, tree_features], tree_feature_names)
            else:
                y_pred = tree.predict(X.iloc[:, tree_features], tree_feature_names)
            predictions[:, i] = y_pred
        if type_ == "mean":
            pred = np.mean(predictions, axis = 1)
            return (pred > 0.5).astype(int)
        else:
            ones_count = np.sum(predictions, axis = 1)
            return (ones_count >= self.n_estimators / 2).astype(int)
    def predict_proba(self, X):
        predictions = np.zeros((X.shape[0], self.n_estimators))
        for i in range(self.n_estimators):
            
            tree = self.trees[i]["tree"]
            tree_features = self.trees[i]["cols_idx"]
            tree_feature_names = [self.feature_names[k] for k in tree_features]
            y_pred = tree.predict_proba(X.iloc[:, tree_features], tree_feature_names)
            predictions[:, i] = y_pred
        return np.mean(predictions, axis = 1)
            
        
                    

In [318]:
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1500, n_features=14, n_informative=10, random_state=42)
X = pd.DataFrame(X)
y = pd.Series(y)
X.columns = [f'col_{col}' for col in X.columns]

X_test, y_test = make_classification(n_samples=30, n_features=14, n_informative=10, random_state=42)
X_test = pd.DataFrame(X_test)
y_test = pd.Series(y_test)
X_test.columns = [f'col_{col}' for col in X_test.columns]

In [319]:
rf = MyForestClf(n_estimators = 6, max_depth = 2, max_features = 0.6, max_samples = 0.5)

In [320]:
rf.fit(X, y)

['col_10', 'col_1', 'col_0', 'col_4', 'col_3', 'col_9', 'col_2', 'col_5'] [10, 1, 0, 4, 3, 9, 2, 5]
['col_11', 'col_12', 'col_4', 'col_10', 'col_9', 'col_1', 'col_0', 'col_2'] [11, 12, 4, 10, 9, 1, 0, 2]
['col_1', 'col_9', 'col_8', 'col_3', 'col_2', 'col_7', 'col_10', 'col_6'] [1, 9, 8, 3, 2, 7, 10, 6]
['col_6', 'col_3', 'col_2', 'col_5', 'col_9', 'col_10', 'col_12', 'col_13'] [6, 3, 2, 5, 9, 10, 12, 13]
['col_4', 'col_12', 'col_3', 'col_1', 'col_10', 'col_13', 'col_2', 'col_11'] [4, 12, 3, 1, 10, 13, 2, 11]
['col_3', 'col_6', 'col_7', 'col_0', 'col_2', 'col_4', 'col_1', 'col_10'] [3, 6, 7, 0, 2, 4, 1, 10]


In [321]:
rf.trees[5]["tree"].fi

{'col_3': 0,
 'col_6': 0,
 'col_7': 0,
 'col_0': 0,
 'col_2': 0,
 'col_4': 0,
 'col_1': 0.16781830692354982,
 'col_10': 0.09299362536373647}

In [322]:
rf.fi

{'col_0': 0,
 'col_1': 0.16781830692354982,
 'col_2': 0,
 'col_3': 0,
 'col_4': 0,
 'col_5': 0.051938347795332004,
 'col_6': 0,
 'col_7': 0,
 'col_8': 0.014794689549743188,
 'col_9': 0,
 'col_10': 0.09299362536373647,
 'col_11': 0,
 'col_12': 0,
 'col_13': 0}

In [323]:
pred = rf.predict(X_test, type_ = "mean")
print(pred)

[1 1 1 1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 0 1 1 1 0 1 1 1 1 1 1]


In [324]:
rf.predict_proba(X_test)

array([0.58682179, 0.60621797, 0.58682179, 0.58682179, 0.17125095,
       0.20713492, 0.53580694, 0.68009541, 0.53178064, 0.58682179,
       0.53147352, 0.53147352, 0.38785186, 0.17125095, 0.58682179,
       0.68009541, 0.68009541, 0.58682179, 0.38785186, 0.28737682,
       0.53147352, 0.58682179, 0.68009541, 0.17125095, 0.53147352,
       0.53178064, 0.58682179, 0.68009541, 0.68009541, 0.53147352])

In [325]:
pred = rf.predict(X_test, type_ = "vote")
print(pred)

[1 1 1 1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 0 1 1 1 0 1 1 1 1 1 1]


In [326]:
dt = MyTreeClf()
dt.fit(X,y)

KeyboardInterrupt: 