## **Two ways to estimate the team value**
### **1st Direct estimation from records**
### **2nd Supervised ML regression algorithm**

In [1]:
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import LinearSVR
from sklearn.metrics import mean_squared_error, r2_score

from lightgbm import LGBMRegressor

In [2]:
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 30)

### Weapon & Grenade Prices

In [3]:
wp_values = {'USP': 200, 'P2000': 200, 'Glock': 200, 'P250': 300, 'DualBarettas': 500, 'Tec9': 500, 'FiveSeven': 500, 'CZ': 500, 'Deagle': 700, 
             'Nova': 1200, 'Swag7': 1800, 'SawedOff': 1200, 'XM1014': 2000, 'M249': 5200, 'Negev': 4000,
             'Mac10': 1050, 'MP9': 1250, 'MP7': 1700, 'UMP': 1200, 'Bizon': 1400, 'P90': 2350, 
             'Gallil': 2000, 'Famas': 2250, 'Scout': 1700, 'AK47': 2700, 'M4A4': 3100, 'M4A1': 3100, 'SG556': 3000, 'AUG': 3300, 'G3SG1': 5000, 'Scar20': 5000, 'AWP': 4750, 
             'Decoy': 50, 'Flash': 200, 'Smoke': 300, 'HE': 300, 'Incendiary': 600, 'Molotov': 400, 
             'Knife': 0, 'Zeus': 200}

### Data

In [4]:
df = pd.read_csv('../data/processed/base_to_ml_predicted_team_value.csv')

## **2nd Supervised ML regression algorithm**
## **Algorithm**

### Preprocessing

Erase column 'round_type' because we cannot know this value from the live records

In [5]:
df.drop(['round_type'], axis=1, inplace=True)

In [6]:
df.tail()

Unnamed: 0,file,round,wp_ct_val,wp_t_val,nade_ct_val,nade_t_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_real,t_val_real
310808,esea_match_13829171.dem,20,15500.0,13500.0,600,1900,0,4,0.0,0.0,0,3,23250,28150
310809,esea_match_13829171.dem,21,17562.5,16062.5,3200,1400,5,0,1.0,0.0,1,0,30950,25450
310810,esea_match_13829171.dem,22,23750.0,13500.0,2700,1400,2,0,1.0,0.0,2,0,32900,16600
310811,esea_match_13829171.dem,23,14312.5,13500.0,1600,2600,2,4,0.0,0.0,0,1,25950,24450
310812,esea_match_13829171.dem,24,23750.0,20333.333333,1200,2700,0,2,0.0,1.0,0,2,14300,27500


We want to predict 'ct_val' and 't_val' so we will split the dataframe into 2 dataframes, one with CT data and other with T data

**ct_df** = ['file',
 'round',
 'wp_ct_val',
 'nade_ct_val',
 'ct_alive',
 't_alive',
 'ct_winner',
 'bomb_planted',
 'ct_cons_wins',
 't_cons_wins',
 'ct_val_real']


**t_df** = ['file',
 'round',
 'wp_t_val',
 'nade_t_val',
 'ct_alive',
 't_alive',
 'ct_winner',
 'bomb_planted',
 'ct_cons_wins',
 't_cons_wins',
 't_val_real']

In [7]:
# Two DataFrames, one per CT another for T

ct_df = df[['file', 'round', 'wp_ct_val', 'nade_ct_val', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 'ct_cons_wins', 't_cons_wins', 'ct_val_real']]
t_df = df[['file', 'round', 'wp_t_val', 'nade_t_val', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 'ct_cons_wins', 't_cons_wins', 't_val_real']]

In [8]:
display(ct_df.head())
t_df.tail()

Unnamed: 0,file,round,wp_ct_val,nade_ct_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_real
0,esea_match_13779704.dem,1,1000.0,550,5,5,0.5,0.5,0,0,4550
1,esea_match_13779704.dem,2,10100.0,1100,4,0,1.0,0.0,1,0,18450
2,esea_match_13779704.dem,3,4125.0,900,0,1,0.0,0.0,0,1,9550
3,esea_match_13779704.dem,4,1000.0,0,0,3,0.0,1.0,0,2,1600
4,esea_match_13779704.dem,5,15500.0,1400,0,4,0.0,1.0,0,3,23350


Unnamed: 0,file,round,wp_t_val,nade_t_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,t_val_real
310808,esea_match_13829171.dem,20,13500.0,1900,0,4,0.0,0.0,0,3,28150
310809,esea_match_13829171.dem,21,16062.5,1400,5,0,1.0,0.0,1,0,25450
310810,esea_match_13829171.dem,22,13500.0,1400,2,0,1.0,0.0,2,0,16600
310811,esea_match_13829171.dem,23,13500.0,2600,2,4,0.0,0.0,0,1,24450
310812,esea_match_13829171.dem,24,20333.333333,2700,0,2,0.0,1.0,0,2,27500


In [9]:
# LabelEncoder to categorical feature -> file
le = LabelEncoder()

ct_df['file'] = le.fit_transform(ct_df.file)
t_df['file'] = le.fit_transform(t_df.file)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [10]:
display(ct_df.head())
t_df.tail()

Unnamed: 0,file,round,wp_ct_val,nade_ct_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,ct_val_real
0,0,1,1000.0,550,5,5,0.5,0.5,0,0,4550
1,0,2,10100.0,1100,4,0,1.0,0.0,1,0,18450
2,0,3,4125.0,900,0,1,0.0,0.0,0,1,9550
3,0,4,1000.0,0,0,3,0.0,1.0,0,2,1600
4,0,5,15500.0,1400,0,4,0.0,1.0,0,3,23350


Unnamed: 0,file,round,wp_t_val,nade_t_val,ct_alive,t_alive,ct_winner,bomb_planted,ct_cons_wins,t_cons_wins,t_val_real
310808,12184,20,13500.0,1900,0,4,0.0,0.0,0,3,28150
310809,12184,21,16062.5,1400,5,0,1.0,0.0,1,0,25450
310810,12184,22,13500.0,1400,2,0,1.0,0.0,2,0,16600
310811,12184,23,13500.0,2600,2,4,0.0,0.0,0,1,24450
310812,12184,24,20333.333333,2700,0,2,0.0,1.0,0,2,27500


In [11]:
# Define all the features

CT_FEATS = ['file', 'round', 'wp_ct_val', 'nade_ct_val', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 'ct_cons_wins', 't_cons_wins']
T_FEATS = ['file', 'round', 'wp_t_val', 'nade_t_val', 'ct_alive', 't_alive', 'ct_winner', 'bomb_planted', 'ct_cons_wins', 't_cons_wins']
CT_TARGET = 'ct_val_real'
T_TARGET = 't_val_real'

In [12]:
#Preprocessor

numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])

preprocessor_ct = ColumnTransformer(transformers=[('num', numeric_transformer, CT_FEATS)])
preprocessor_t = ColumnTransformer(transformers=[('num', numeric_transformer, T_FEATS)])

### **SPLITS**

In [13]:
ct_train, ct_test = train_test_split(ct_df)
t_train, t_test = train_test_split(t_df)

In [14]:
print(ct_train.shape, ct_test.shape)
print(t_train.shape, t_test.shape)

(233109, 11) (77704, 11)
(233109, 11) (77704, 11)


### **MODEL**

In [15]:
# from sklearn.linear_model import Lasso, Ridge
# from sklearn.ensemble import RandomForestRegressor
# from sklearn.svm import LinearSVR
# from sklearn.metrics import mean_squared_error

In [16]:
# regressor = LGBMRegressor(boosting_type='gbdt', 
#                        bagging_freq=1, 
#                        bagging_fraction = 0.9, 
#                        n_estimators=100)

regressor = Lasso()

In [17]:
ct_model = Pipeline(steps=[('ct_preprocessor', preprocessor_ct),
                           ('regressor', regressor)])

# t_model = Pipeline(steps=[('t_preprocessor', preprocessor_t),
#                            ('regressor', regressor)])

In [18]:
ct_model.fit(ct_train[CT_FEATS], ct_train[CT_TARGET]);

In [19]:
# t_model.fit(t_train[T_FEATS], t_train[T_TARGET]);

### **CHECK PERFORMANCE**

In [20]:
y_ct_test = ct_model.predict(ct_test[CT_FEATS])
y_ct_train = ct_model.predict(ct_train[CT_FEATS])

# y_t_test = t_model.predict(t_test[T_FEATS])
# y_t_train = t_model.predict(t_train[T_FEATS])

In [21]:
# Mean_squared_error

# print(f"CT test error: {mean_squared_error(y_pred=y_ct_test, y_true=ct_test[CT_TARGET], squared=False)}")
# print(f"CT train error: {mean_squared_error(y_pred=y_ct_train, y_true=ct_train[CT_TARGET], squared=False)}")
# print()
# print(f"T test error: {mean_squared_error(y_pred=y_t_test, y_true=t_test[T_TARGET], squared=False)}")
# print(f"T train error: {mean_squared_error(y_pred=y_t_train, y_true=t_train[T_TARGET], squared=False)}")

In [22]:
# r2_score

print(f"CT test error: {r2_score(y_pred=y_ct_test, y_true=ct_test[CT_TARGET])}")
print(f"CT train error: {r2_score(y_pred=y_ct_train, y_true=ct_train[CT_TARGET])}")
print()
# print(f"T test error: {r2_score(y_pred=y_t_test, y_true=t_test[T_TARGET])}")
# print(f"T train error: {r2_score(y_pred=y_t_train, y_true=t_train[T_TARGET])}")

CT test error: 0.9028121259854016
CT train error: 0.9022757539622063



-----------------------
### LASSO
CT test error: 3292.6930145373203 || 0.90281

CT train error: 3295.6298773874532 || 0.90227

T test error: 3373.994920736141 || 0.85433

T train error: 3366.1969792373247 || 0.85487

----------------------
### RIDGE
CT test error: 3304.062840613299 || 0.90293

CT train error: 3291.813373721748 || 0.90223

T test error: 3353.5882111017418 || 0.85444

T train error: 3372.947181422599 || 0.85483

---------------------------------
### RANDOM FOREST REGRESSOR --> Overfitting
CT test error: 2800.540308676199

CT train error: 1050.021289181493

T test error: 2827.298533217404

T train error: 1066.455275320614

--------------------------------
### LINEAR SVR
CT test error: 3326.937214472523 || 0.90002

CT train error: 3320.733667089124 || 0.90106

T test error: 3376.8151963908695 || 0.85236

T train error: 3398.197092642212 || 0.85268

--------------------------------
### LGBM REGRESSOR --> Looks the most accurate
CT test error: 2710.8081816807326 || 0.93391

CT train error: 2698.860705561208 || 0.93466

T test error: 2725.3209548515083 || 0.90388

T train error: 2728.544963565238 || 0.90519


weapons(rnd), nades(rnd), players_alive(rnd), winner_team(rnd), bomb_planted(rnd), cons_wins(rnd), extr_val(rnd)
