## Лабораторная работа "Алгоритмы машинного обучения"
## Бабичева Анна
## М8О-304Б-17

In [1]:
import warnings

warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import time

from matplotlib.colors import ListedColormap
from datetime import datetime 
from pandas import Series, DataFrame 
from sklearn.model_selection import cross_val_score, cross_validate, train_test_split
 
import lightgbm as lgb 
import xgboost as xgb 

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

%matplotlib inline

# Задача классификации

### Загрузка данных

In [2]:
X = pd.read_csv('classification/data/X_adult.csv')
Y = pd.read_csv('classification/data/Y_adult.csv')
test = pd.read_csv('classification/data/test_adult.csv')
target = pd.read_csv('classification/data/target_adult.csv')

Так как данные загружаются с одной лишней колонкой, пропишу функцию ее удаления:

In [3]:
def delete_unn(data):
    return data.drop('Unnamed: 0', axis=1)

Предобрабатываю данные, удаляю лишнюю колонку, target делаю списком:

In [4]:
X = delete_unn(X)
Y = delete_unn(Y)
test = delete_unn(test)
target = delete_unn(target)
target = target['income'].values

### Метрики качества

In [5]:
class metrics:
    
    def _tfpns(self, preds, target):
        self.TP = 0
        self.TN = 0
        self.FP = 0
        self.FN = 0
        
        for i in range(len(target)):
            if (target[i] == 1) & (preds[i] == 1):
                self.TP += 1
            elif (target[i] == 0) & (preds[i] == 0):
                self.TN += 1
            elif (target[i] == 0) & (preds[i] == 1):
                self.FP += 1
            elif (target[i] == 1) & (preds[i] == 0):
                self.FN += 1        
    
    def accuracy(preds, target):
        if len(target) != len(preds):
            raise Exception('Несоответствие размерностей')
        return np.equal(target, preds).sum() / float(target.shape[0])
    # точность
    def precision(self, preds, target):
        if len(target) != len(preds):
            raise Exception('Несоответствие размерностей')
        self._tfpns(preds, target)
        return self.TP / (self.TP + self.FP)
    # полнота
    def recall(self, preds, target):
        if len(target) != len(preds):
            raise Exception('Несоответствие размерностей')
        self._tfpns(preds, target)
        return self.TP / (self.TP + self.FN)
    
    def logloss(preds, target):
        if len(target) != len(preds):
            raise Exception('Несоответствие размерностей')
        l = len(target)
        return (-1 / l) * sum([target[i] * math.log(preds[i]) + (1 - target[i]) * math.log(1 - preds[i]) for i in range(l)])
    
    def F1(self, preds, target):
        if len(target) != len(preds):
            raise Exception('Несоответствие размерностей')
        p = self.precision(preds, target)
        r = self.recall(preds, target)
        return (2 * p * r) / (p + r)
    # true positive rate
    def TPR(self, preds, target):
        if len(target) != len(preds):
            raise Exception('Несоответствие размерностей')
        self._tfpns(preds, target)
        return self.TP / (self.TP + self.FN)
    # false positive rate
    def FPR(self, preds, target):
        if len(target) != len(preds):
            raise Exception('Несоответствие размерностей')
        self._tfpns(preds, target)
        return self.FP / (self.FP + self.TN)
    
    def MSE(preds, target):
        if len(target) != len(preds):
            raise Exception('Несоответствие размерностей')
        s = 0
        for i in range(len(target)):
            s += (target[i] - preds[i]) ** 2
        return s / len(preds)
    
    def R2(preds, target):
        if len(target) != len(preds):
            raise Exception('Несоответствие размерностей')
        ESS = sum([(preds[i] - target.mean()) ** 2 for i in range(len(preds))])
        RSS = sum([(preds[i] - target[i]) ** 2 for i in range(len(preds))])
        TSS = ESS + RSS
        return ESS / TSS
                                                

In [6]:
metric = metrics()

### Логистическая регрессия

In [7]:
class Logistic_Regression:
    
    def __init__(self, method='MNK', L=0.9, lmbd=10, epochs=200, eps=0.01):
        self.L = L
        self.lmbd = lmbd
        self.epochs = epochs
        self.eps = eps
        self.weights = None
        self.X = None
        self.Y = None
        self.preds = None
        self.method = method           
    
    def _sigmoid(x):
        return np.exp(-np.logaddexp(0, -x))
    
    def _Gradient(X, Y, L, lmbd, epochs, eps):
        w0 = np.zeros(X.shape[1])
        w = np.ones(X.shape[1])
        i = 0
        e = np.linalg.norm(w - w0)
        while (i <= epochs) & (e > eps): 
            w[0] = w[0] + L * sum(Y[0] - w[0] - sum(w[k + 1] * X[k] for k in range(w.shape[0] - 1)) + lmbd * np.sign(w[0]))
            for j in range(1, w.shape[0]):        
                w[j] = w[j] + L * sum(X[j - 1] * (Y[j] - w[0] - sum(w[k + 1] * X[k] for k in range(w.shape[0] - 1)) + lmbd * np.sign(w[j])))
            e = np.linalg.norm(w - w0)
            w0 = w
            i += 1
        #while (np.linalg.norm(w - w0) > eps) & (k <= epochs):
            #w0 = w
            #temp = L * ((1 / k) ** 0.5)
            #c = (Y * Logistic_Regression._sigmoid(-Y * X.dot(w0)))
            #c = c.reshape((c.shape[0], 1)) 
            #w = w - temp * (-(c * X).sum(axis = 0)) + lmbd * np.sign(w)
            #k += 1            
        return w
    
    def _MNK(X, Y, L, lmbd, epochs, eps):
        X = np.array(X)
        y = np.array(Y)
        ones = np.ones(X.shape[0]).reshape(X.shape[0], 1)
        X = np.hstack((ones, X))
        w = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
        return w
    
    def log_loss(self, target):
        if self.preds is None:
            raise Exception('Воспользуйтесь функцией predict')
        return np.log(1. + np.exp(- self.preds * target)).sum()
    
    def fit(self, train_data, target):   
        x = np.ones((train_data.shape[0], 1))
        self.X = np.hstack((x, train_data))
        self.Y = np.array(target)
        self.Y[target == 0] = -1
        if self.method == 'MNK':
            self.weights = Logistic_Regression._MNK(train_data, target, self.L, self.lmbd, self.epochs, self.eps)
        if self.method == 'Gradient':
            self.weights = Logistic_Regression._Gradient(train_data, target, self.L, self.lmbd, self.epochs, self.eps)
        
    def predict(self, test_data):
        ones = np.ones((test_data.shape[0], 1))
        test = np.hstack((ones, test_data))
        test_w = test.dot(self.weights)
        self.preds = np.zeros(test_w.shape)
        self.preds[test_w > 0] = 1
        self.preds = self.preds.reshape(self.preds.shape[0],)
        
    def probs(self, test_data):
        ones = np.ones((test_data.shape[0], 1))
        test = np.hstack((ones, test_data))
        test_w = test.dot(self.weights)
        return Logistic_Regression._sigmoid(test_w)

Обучение модели с МНК:

In [8]:
model_logreg_MNK = Logistic_Regression()
model_logreg_MNK.fit(X, Y)

In [9]:
model_logreg_MNK.predict(test)
logreg_MNK_preds = model_logreg_MNK.preds

Проверка качества с помощью accuracy и precision:

In [10]:
metrics.accuracy(logreg_MNK_preds, target)

0.60936060438548

In [11]:
metric.precision(logreg_MNK_preds, target)

0.23014169171318163

Обучение с помощью стандартной функции:

In [12]:
logreg = LogisticRegression()
logreg.fit(X, Y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [13]:
logreg_p = logreg.predict(test)

Проверка качества:

In [14]:
metrics.accuracy(logreg_p, target)

0.8423929734045821

In [15]:
metric.precision(logreg_p, target)

0.7120609675281644

### SVM

In [16]:
class SVM:
    
    def __init__(self, etha=0.01, alpha=0.1, epochs=200):
        self.etha = etha
        self.alpha = alpha
        self.epochs = epochs
        self.weights = None
        self.X_train = None
        self.Y_train = None
        self.X_val = None
        self.Y_val = None
        self.preds = None
        self.train_errors = []
        self.val_errors = []
        self.train_loss = []
        self.val_loss = []
        
    def _make_data(x):
        x_new = np.zeros((x.shape[0], x.shape[1] + 1))
        x_new[:, :-1] = np.array(x)
        x_new[:, -1] = 1
        return x_new
    
    def _hinge_loss(self, x, y):
        return max(0, 1 - y * (self.weights.dot(x)))
    
    def _margin_loss(self, x, y):
        return self._hinge_loss(x, y) + self.alpha * self.weights * self.weights
    
    def fit(self, X, Y, validate_rate=0.4, random_state=42):
        X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=validate_rate, random_state=random_state)
        self.X_train = SVM._make_data(X_train)
        self.X_val = SVM._make_data(X_val)
        self.Y_train = np.array(Y_train)
        self.Y_train[Y_train == 0] = -1
        self.Y_val = np.array(Y_val)
        self.Y_val[Y_val == 0] = -1
        self.weights = np.random.normal(loc=0, scale=0.05, size=self.X_train.shape[1])
        
        for epoch in range(self.epochs):
            tr_err = 0
            tr_loss = 0
            val_err = 0
            val_loss = 0
            
            for i, x in enumerate(self.X_train):
                marg = self.Y_train[i] * self.weights.dot(x)
                if marg >= 1:
                    self.weights = self.weights - self.etha * self.alpha * self.weights / self.epochs
                else:
                    self.weights = self.weights + self.etha * (self.Y_train[i] * x - self.alpha * self.weights / self.epochs)
                    tr_err += 1
                tr_loss += self._margin_loss(x, self.Y_train[i])
                
            for i, x in enumerate(self.X_val):
                val_loss += self._margin_loss(x, self.Y_val[i])
                val_err += (self.Y_val[i] * self.weights.dot(x) < 1).astype(int)
                
            self.train_errors.append(tr_err)
            self.train_loss.append(tr_loss)
            self.val_errors.append(val_err)
            self.val_loss.append(val_loss)
            
    def predict(self, test):
        X = SVM._make_data(test)
        self.preds = np.zeros(X.shape[0])
        
        for i, x in enumerate(X):
            if np.sign(self.weights.dot(x)) == 1:
                self.preds[i] = 1
        

Обучение:

In [17]:
model_SVM = SVM()
model_SVM.fit(X, Y)

In [18]:
model_SVM.predict(test)
SVM_preds = model_SVM.preds

Проверка качества:

In [19]:
metrics.accuracy(SVM_preds, target)

0.7730483385541429

In [20]:
metric.precision(SVM_preds, target)

0.9415204678362573

Обучение с помощью встроенной функции:

In [21]:
svm = SVC(kernel='linear')
svm.fit(X, Y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [22]:
svm_p = svm.predict(test)

Проверка качества:

In [23]:
metrics.accuracy(svm_p, target)

0.8447269823720902

In [24]:
metric.precision(svm_p, target)

0.7297768479776848

### Дерево решений (старая версия - очень долго обучалась)

In [25]:
class Descision_Tree_1:
    
    def __init__(self, classes=2, min_size=3, max_depth=7):
        self.classes = classes
        self.min_size = min_size
        self.max_depth = max_depth
        self.classes_vals = list(np.arange(classes))
        self.split = None
        self.data = None
        self.preds = None
        pass
    
    def _gini(groups, classes):
        gini = 0
        count = sum([len(group) for group in groups])
        
        for group in groups:
            size = len(group)
            if size == 0:
                continue
            score = 0
            for c in classes:
                score = ([g[-1] for g in group].count(c) / size) ** 2
            gini += (1 - score) * size / count
        return gini
    
    def _get_frequent(self, group):
        vals = [g[-1] for g in group]
        return max(set(vals), key=vals.count)
    
    def _split_by_step(self, data):
        score = 1000
        
        for i in range(data.shape[0] - 1):
            for row in data:
                left = []
                right = []
                for r in data:
                    if r[i] < row[i]:
                        left.append(r)
                    else:
                        right.append(r)
                gini = Descision_Tree._gini((left, right), self.classes_vals)
                if gini < score:
                    index = i
                    val = row[i]
                    score = gini
                    groups = (left, right)
        return {'idx': index, 'val': val, 'groups': groups}
    
    def _is_less(self, group, title, depth):
        if len(group) <= self.min_size:
            root[title] = self._get_frequent(group)
        else:
            root[title] = self._split_by_step(group)
            self._split(root[title], depth + 1)
     
    def _split(self, root, depth):
        print(depth)
        left, right = root['groups']
        del(root['groups'])
        if not left or not right:
            root['left'] = root['right'] = self._get_frequent(left + right)
            return
        if depth >= self.max_depth:
            root['left'] = self._get_frequent(left)
            root['right'] = self._get_frequent(right)
            return
        self._is_less(self, left, 'left', depth)
        self._is_less(self, right, 'right', depth)
        
    def _build_tree(self):
        root = self._split_by_step(self.data)
        self._split(root, 1)
        self.split = root
        
    def fit(self, X, Y):
        self.data = np.hstack((np.array(X), np.array(Y)))
        self._build_tree()
        
    def _instance(self, root, title, x):
        if isintance(root[title], dict):
            return self._predict_one(root[title], x)
        else:
            return root[title]
        
    def _predict_one(self, root, x):
        if x[root['index']] < root['value']:
            self._instance(root, 'left', x)
        else:
            self._instance(root, 'right', x)
            
        
    def predict(self, test):
        X = np.array(test)
        self.preds = []
        for x in X:
            self.preds.append(self._predict_one(self.split, x))


### Дерево решений (новая версия)

In [26]:
class Descision_Tree:
    
    class Node:
        
        def __init__(self, idxs=None, positives=None, negatives=None, c=None):
            self.predicat = None
            self.left = None
            self.right = None
            self.positives = positives
            self.negatives = negatives
            self.cl = c
            self.idxs = idxs
            self.len = self.idxs.shape[0]
        
        def is_leaf(self):
            return self.predicat == None
        
        def is_root(self):
            return not self.is_leaf()
        
        def make_leaf(self):
            self.predicat = None
            self.left = None
            self.right = None
            
        def classify(self):
            if self.positives >= self.negatives:
                self.cl = 1
            else:
                self.cl = 0
            
    def __init__(self, classes=2, min_size=3, max_depth=7):
        self.classes = classes
        self.min_size = min_size
        self.max_depth = max_depth
        self.classes_vals = list(np.arange(classes))
        self.split = None
        self.X = None
        self.Y = None
        self.preds = None
        self.idxs = None
        self.is_cat = None
        self.cat_vals = {}
            
    def _gini(poses, negs):
        p = poses / (poses + negs)
        return 2 * p * (1 - p) 
    
    def _find_cat(self):
        self.is_cat = np.zeros(self.X.shape[1])
        for i in range(self.X.shape[1]):
            uniq = np.unique(self.X[:, i])
            if len(uniq) < 10:
                self.cat_vals[i] = uniq
                self.is_cat[i] = 1
    
    def _build_tree(self):
        self.split = Descision_Tree.Node(idxs=np.arange(self.Y.shape[0]))
        self.split.positives = self.Y.sum()
        self.split.negatives = self.split.len - self.split.positives
        self._make_nodes(self.split, 1)
    
    def _stop(self, node, depth):
        if (node.len < self.min_size) | (depth > self.max_depth):
            return True
        return (node.positives == 0 | node.negatives == 0)
    
    def _find_split(self, node):
        x = self.X[node.idxs]
        y = self.Y[node.idxs]
        score = Descision_Tree._gini(node.positives, node.negatives)
        
        best_score = 0
        best_ind = None
        best_val = None
        
        for i in range(self.X.shape[1]):
            col = x[:, i]
            if self.is_cat[i]:
                vals = self.cat_vals[i]
                for j in range(1, vals.shape[0]):
                    yy = y[col < vals[j]]
                    if (yy.shape[0] == 0) | (yy.shape[0] == y.shape[0]):
                        continue
                    right_pos = yy.sum()
                    right_neg = yy.shape[0] - right_pos
                    right_score = Descision_Tree._gini(right_pos, right_neg)
                    left_score = Descision_Tree._gini(node.positives - right_pos, node.negatives - right_neg)
                    
                    s = score - (yy.shape[0] * right_score / node.len) - (1 - yy.shape[0] / node.len) * left_score
                    
                    if s > best_score:
                        best_val = vals[j]
                        best_ind = i
                        best_score = s
            else:
                sort_x = np.argsort(col)
                right_neg = 0
                right_pos = 0
                last_val = col[sort_x[0]]
                
                for j in range(1, col.shape[0]):
                    if y[sort_x[j - 1]]:
                        right_pos += 1
                    else:
                        right_neg += 1
                    
                    ind = sort_x[j]
                    if col[ind] == last_val:
                        continue
                    last_val = col[ind]
                    right_score = Descision_Tree._gini(right_pos, right_neg)
                    left_score = Descision_Tree._gini(node.positives - right_pos, node.negatives - right_neg)
                    
                    s = score - (j * right_score / node.len) - (1 - j / node.len) * left_score
                    if s > best_score:
                        best_val = col[ind]
                        best_ind = i
                        best_score = s
        if best_score > 0:
            return best_ind, best_val
        return None, None
    
    def _split(self, node, ind, val):
        node.predicat = lambda x: x[ind] < val
        col = self.X[node.idxs, ind]
        
        right_inds = node.idxs[col < val]
        left_inds = node.idxs[col >= val]
        
        right_pos = self.Y[right_inds].sum()
        left_pos = self.Y[left_inds].sum()
        right_neg = right_inds.shape[0] - right_pos
        left_neg = left_inds.shape[0] - left_pos
        
        node.left = Descision_Tree.Node(idxs=left_inds, positives=left_pos, negatives=left_neg)
        node.right = Descision_Tree.Node(idxs=right_inds, positives=right_pos, negatives=right_neg)
        
    def _make_nodes(self, node, depth):
        node.classify()
        
        if self._stop(node, depth) == True:
            return
        ind, val = self._find_split(node)
        if (ind == None) | (val == None):
            return
        
        self._split(node, ind, val)
        
        self._make_nodes(node.left, depth + 1)
        self._make_nodes(node.right, depth + 1)
    
    def fit(self, X, Y):
        self.X = np.array(X)
        self.Y = np.array(Y)
        self.idxs = np.arange(self.X.shape[0])
        self._find_cat()
        self._build_tree()
        
    def predict(self, test):
        test = np.array(test)
        self.preds = np.zeros(len(test))
        for i in range(len(test)):
            node = self.split
            while node.is_root():
                if node.predicat(test[i]):
                    node = node.right
                else:
                    node = node.left
            self.preds[i] = node.cl

Обучение:

In [27]:
model_tree = Descision_Tree()
model_tree.fit(X, Y)

In [28]:
model_tree.predict(test)
tree_preds = model_tree.preds

Проверка качества:

In [29]:
metrics.accuracy(tree_preds, target)

0.8506848473680978

In [30]:
metric.precision(tree_preds, target)

0.787952787952788

Обучение с помощью стандартных моделей:

In [31]:
tree = DecisionTreeClassifier(max_depth=7)
tree.fit(X, Y)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=7,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

In [32]:
tree_p = tree.predict(test)

Проверка качества:

In [33]:
metrics.accuracy(tree_p, target)

0.8425158159818193

In [34]:
metric.precision(tree_p, target)

0.7216459197786999

### kNN

In [35]:
class kNN:
    
    def __init__(self, k, num_classes):
        if (k < 2) | (num_classes < 2):
            raise Exception("Неверное задание параметров")
        self.k = k
        self.classes = num_classes
        self.X = None
        self.Y = None
        self.preds = []
        pass
    
    def _dist(x, y):
        x = np.array(x, dtype='f')    
        y = np.array(y, dtype='f')
        return np.linalg.norm(x - y)
    
    def fit(self, train_data, target):       
        self.X = train_data
        self.Y = target
        if len(self.Y) - 1 < self.k:
            raise Exception('k > len(target)')
        
    def predict(self, test_data):
        for i in range(test_data.shape[0]):
            print(i)
            test_point = test_data.loc[[i], test_data.columns]
            distances = []
            r = []
            for j in range(self.X.shape[0]):
                x = self.X.loc[[j], self.X.columns]
                distances.append([kNN._dist(test_point, x), self.Y.iloc[[j], [0]].values[0][0]])
            
            stat = [0 for i in range(self.classes)]
            for d in sorted(distances)[0:self.k]:
                stat[d[1]] += 1

            self.preds.append(sorted(zip(stat, range(self.classes)), reverse=True)[0][1])  

Мой алгоритм получился неоптимизированным по времени, поэтому проверю его работу лишь на первых 50 элементах тестовой выборки:

In [36]:
new_test = test[:50]

Обучение:

In [37]:
model_knn = kNN(k=5, num_classes=2)
model_knn.fit(X, Y)

In [38]:
model_knn.predict(new_test)
knn_preds = model_knn.preds

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


Проверка качества:

In [39]:
metrics.accuracy(knn_preds, target[:50])

0.84

In [40]:
metric.precision(knn_preds, target[:50])

1.0

Обучение с помощью встроенной модели:

In [41]:
knn = KNeighborsClassifier(n_neighbors=5, n_jobs=2)
knn.fit(X, Y)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=2, n_neighbors=5, p=2,
                     weights='uniform')

In [42]:
knn_p = knn.predict(new_test)

Проверка качества:

In [44]:
metrics.accuracy(knn_p, target[:50])

0.88

In [45]:
metric.precision(knn_p, target[:50])

1.0

# Задача регрессии

### Загрузка данных

In [63]:
X_r = pd.read_csv('housing/data/X_housing.csv')
Y_r = pd.read_csv('housing/data/Y_housing.csv')
test_r = pd.read_csv('housing/data/test_housing.csv')
target_r = pd.read_csv('housing/data/target.csv')

### Предобработка

In [64]:
X_r = delete_unn(X_r)
Y_r = delete_unn(Y_r)
test_r = delete_unn(test_r)
target_r = target_r['SalePrice'].values

In [65]:
un_test = []
un_x = []

for c in X_r:
    if c in test_r:
        continue
    un_test.append(c)

for c in test_r:
    if c in X_r:
        continue
    un_x.append(c)

In [66]:
test_r = test_r.drop(un_x, axis=1)
X_r = X_r.drop(un_test, axis=1)

In [67]:
test_r.shape

(733, 164)

In [68]:
X_r.shape

(2197, 164)

### Обучение на xgboost

In [69]:
xgbr_params = {'max_depth':7,
               'n_estimators':700,
               'objective':'reg:squarederror',
               'n_jobs':-1, 
               'min_child_weight':1,
               'eta':0.3, 
               'subsample':0.8, 
               'gamma':0.5, 
               'colsample_bytree':0.8, 
               'learning_rate':0.05}
model = xgb.XGBRegressor(**xgbr_params, random_state=42)
model.fit(X_r, Y_r)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.8, eta=0.3, gamma=0.5,
             importance_type='gain', learning_rate=0.05, max_delta_step=0,
             max_depth=7, min_child_weight=1, missing=None, n_estimators=700,
             n_jobs=-1, nthread=None, objective='reg:squarederror',
             random_state=42, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
             seed=None, silent=None, subsample=0.8, verbosity=1)

In [70]:
preds = model.predict(test_r)

### Проверка качества

In [71]:
metrics.MSE(preds, target_r)

470001922.20854515

In [72]:
metrics.R2(preds, target_r)

0.9193663340242011