In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder

In [6]:



class GradientBoost:

    def __init__(self, max_depth=10, min_samples_splits=3, min_samples_leaf=5, max_feature=3, learning_rate=0.1 , number_of_trees = 50):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_splits
        self.min_samples_leaf = min_samples_leaf
        self.max_features = max_feature
        self.learning_rate = learning_rate
        self.number_of_trees = number_of_trees
        self.y_mean = 0


    def __compute_residual(self, y, y_pred):
        return y - y_pred
    
    def __compute_loss(self, y, y_pred):
        return (1/len(y)) * 0.5 * np.sum((y - y_pred)**2)


    def __create_model(self, X, y):
        base = DecisionTreeRegressor(max_depth=self.max_depth,
                                    min_samples_split=self.min_samples_split,
                                    min_samples_leaf=self.min_samples_leaf,
                                    max_features=self.max_features)
        base.fit(X,y)
        return base
    

    def predict(self, models, x ,y):
        y_pred = np.array([self.y_mean] * len(x))
        y_pred = y_pred.reshape(len(y_pred),1)

        for i in range(len(models)):
        
            tmp = self.model[i].predict(x).reshape(len(x),1)

            y_pred += self.learning_rate * tmp

        return y_pred
    

    def train(self, x, y):
        models = []
        losses = []
        self.y_mean = np.mean(y)
        y_pred = np.array([self.y_mean] * len(y))

        y_pred = y_pred.reshape(len(y_pred),1)

        for i in range(self.number_of_trees):

            loss = self.__compute_loss(y, y_pred)
            
            losses.append(loss)

            residual = self.__compute_residual(y, y_pred)

            model = self.__create_model(x, residual)

            model_predict = (model.predict(x)).reshape(len(x),1)

            y_pred += self.learning_rate * model_predict

            models.append(model)

        return models, losses, y_pred

In [11]:


data = pd.read_csv('dataset/csgo.csv')


data = data.drop(['day', 'month', 'year', 'date', 'wait_time_s', 
                  'match_time_s', 'team_a_rounds', 'team_b_rounds', 'result'], axis=1)


onehot_encoder = OneHotEncoder()
map_encoded = onehot_encoder.fit_transform(data[["map"]]).toarray()  

map_encoded_data = pd.DataFrame(map_encoded, columns=onehot_encoder.get_feature_names_out(["map"]))


data = pd.concat([map_encoded_data, data.drop(columns=["map"])], axis=1)


x = data.drop('points', axis=1)
y = data['points']

print(x)




      map_Austria  map_Cache  map_Canals  map_Cobblestone  map_Dust II  \
0             0.0        0.0         0.0              0.0          0.0   
1             0.0        0.0         0.0              0.0          0.0   
2             0.0        0.0         0.0              0.0          0.0   
3             0.0        0.0         0.0              0.0          0.0   
4             0.0        0.0         0.0              0.0          0.0   
...           ...        ...         ...              ...          ...   
1128          0.0        0.0         0.0              0.0          1.0   
1129          0.0        0.0         0.0              0.0          1.0   
1130          0.0        0.0         0.0              0.0          1.0   
1131          0.0        0.0         0.0              0.0          1.0   
1132          0.0        0.0         0.0              0.0          1.0   

      map_Inferno  map_Italy  map_Mirage  map_Nuke  map_Overpass   ping  \
0             0.0        0.0        