In [178]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor

In [179]:
def prediction_test(models, x_test, mean):
    y_pred = np.array([mean] * x_test.shape[0])
    y_pred = y_pred.reshape(len(y_pred),1)

    for i in range(len(models)):
        
        tmp = models[i].predict(x_test).reshape(x_test.shape[0],1)

        y_pred += 0.01 * tmp

    return y_pred

In [180]:

class GradientBoost:

    def __init__(self, max_depth=10, min_samples_splits=3, min_samples_leaf=5, max_feature=3, learning_rate=0.01 , number_of_trees = 1000):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_splits
        self.min_samples_leaf = min_samples_leaf
        self.max_features = max_feature
        self.learning_rate = learning_rate
        self.number_of_trees = number_of_trees
        self.y_mean = 0


    def __compute_residual(self, y, y_pred):
        return y - y_pred
    
    def __compute_loss(self, y, y_pred):

        return (1/len(y)) * 0.5 * np.sum((y - y_pred)**2)


    def __create_model(self, X, y):
        base = DecisionTreeRegressor(max_depth=self.max_depth,
                                    min_samples_split=self.min_samples_split,
                                    min_samples_leaf=self.min_samples_leaf,
                                    max_features=self.max_features)
        base.fit(X,y)
        return base
    

    def predict(self, models, x ,y):
        y_pred = np.array([self.y_mean] * len(x))
        y_pred = y_pred.reshape(len(y_pred),1)

        for i in range(len(models)):
        
            tmp = self.model[i].predict(x).reshape(len(x),1)

            y_pred += self.learning_rate * tmp

        return y_pred
    

    def train(self, x, y):
        models = []
        losses = []
        self.y_mean = np.mean(y)
        y_pred = np.array([self.y_mean] * len(y))

        y_pred = y_pred.reshape(len(y_pred),1)

        for i in range(self.number_of_trees):

            loss = self.__compute_loss(y, y_pred)
            
            losses.append(loss)

            residual = self.__compute_residual(y, y_pred)

            model = self.__create_model(x, residual)

            model_predict = (model.predict(x)).reshape(len(x),1)

            y_pred += self.learning_rate * model_predict

            models.append(model)

        return models, losses, y_pred

In [181]:


data = pd.read_csv('dataset/csgo.csv')


data = data.drop(['day', 'month', 'year', 'date', 'wait_time_s', 
                  'match_time_s', 'team_a_rounds', 'team_b_rounds', 'result'], axis=1)


onehot_encoder = OneHotEncoder()
map_encoded = onehot_encoder.fit_transform(data[["map"]]).toarray()  

map_encoded_data = pd.DataFrame(map_encoded, columns=onehot_encoder.get_feature_names_out(["map"]))


data = pd.concat([map_encoded_data, data.drop(columns=["map"])], axis=1)

scaler = StandardScaler()
x = data.drop('points', axis=1)
y = data['points']
y = np.expand_dims(y, 1)
print(x)




      map_Austria  map_Cache  map_Canals  map_Cobblestone  map_Dust II  \
0             0.0        0.0         0.0              0.0          0.0   
1             0.0        0.0         0.0              0.0          0.0   
2             0.0        0.0         0.0              0.0          0.0   
3             0.0        0.0         0.0              0.0          0.0   
4             0.0        0.0         0.0              0.0          0.0   
...           ...        ...         ...              ...          ...   
1128          0.0        0.0         0.0              0.0          1.0   
1129          0.0        0.0         0.0              0.0          1.0   
1130          0.0        0.0         0.0              0.0          1.0   
1131          0.0        0.0         0.0              0.0          1.0   
1132          0.0        0.0         0.0              0.0          1.0   

      map_Inferno  map_Italy  map_Mirage  map_Nuke  map_Overpass   ping  \
0             0.0        0.0        

In [182]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state=42)
mean = np.mean(y_train)
print(x_train.dtypes)

print(x_test.shape[0])

map_Austria        float64
map_Cache          float64
map_Canals         float64
map_Cobblestone    float64
map_Dust II        float64
map_Inferno        float64
map_Italy          float64
map_Mirage         float64
map_Nuke           float64
map_Overpass       float64
ping               float64
kills              float64
assists            float64
deaths             float64
mvps               float64
hs_percent         float64
dtype: object
227


In [183]:
GB = GradientBoost()

models, losses, y_pred = GB.train(x_train, y_train)

y_predictions = prediction_test(models, x_test, mean)

mse = mean_squared_error(y_test, y_predictions)

for i in range(len(y_predictions)):
    print({'Predictions': y_predictions[i], 'Actual': y_test[i]})

print('Mean Squared Error:', mse)

{'Predictions': array([39.50831897]), 'Actual': array([35.])}
{'Predictions': array([37.19686855]), 'Actual': array([32.])}
{'Predictions': array([38.22803078]), 'Actual': array([40.])}
{'Predictions': array([50.53182617]), 'Actual': array([52.])}
{'Predictions': array([37.37803122]), 'Actual': array([37.])}
{'Predictions': array([40.08833056]), 'Actual': array([37.])}
{'Predictions': array([36.07286825]), 'Actual': array([32.])}
{'Predictions': array([52.40160628]), 'Actual': array([48.])}
{'Predictions': array([43.59569228]), 'Actual': array([47.])}
{'Predictions': array([47.92829529]), 'Actual': array([49.])}
{'Predictions': array([36.8754641]), 'Actual': array([32.])}
{'Predictions': array([47.28377238]), 'Actual': array([42.])}
{'Predictions': array([49.16598298]), 'Actual': array([48.])}
{'Predictions': array([52.22690376]), 'Actual': array([48.])}
{'Predictions': array([32.83502529]), 'Actual': array([33.])}
{'Predictions': array([33.63275219]), 'Actual': array([32.])}
{'Predict

In [184]:
# rgs = GradientBoostingRegressor()

# rgs.fit(x_train, y_train.ravel())

# y_predict = rgs.predict(x_test)

# mse = mean_squared_error(y_test, y_predict)

# print('Mean Squared Error:', mse)