In [3]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler, MultiLabelBinarizer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.linear_model import Ridge, RidgeClassifier
from sklearn.metrics import accuracy_score, make_scorer, confusion_matrix
from sklearn.kernel_ridge import KernelRidge


In [4]:
data = pd.read_csv("datadump_s5-000.csv", header = 0)

In [5]:
data.head()

Unnamed: 0,dateid,platform,gamemode,mapname,matchid,roundnumber,objectivelocation,winrole,endroundreason,roundduration,...,primarygrip,primaryunderbarrel,primarybarrel,secondaryweapon,secondaryweapontype,secondarysight,secondarygrip,secondaryunderbarrel,secondarybarrel,secondarygadget
0,20170212,PC,HOSTAGE,CLUB_HOUSE,1522380841,1,STRIP_CLUB,Defender,AttackersKilledHostage,124,...,Vertical,,Compensator,5.7_USG,Pistols,,,,,IMPACT_GRENADE
1,20170212,PC,HOSTAGE,CLUB_HOUSE,1522380841,4,CHURCH,Defender,AttackersEliminated,217,...,Vertical,Laser,Suppressor,P12,Pistols,,,Laser,Suppressor,DEPLOYABLE_SHIELD
2,20170212,PC,HOSTAGE,CLUB_HOUSE,1522380841,3,CHURCH,Defender,AttackersEliminated,160,...,,,,MK1_9mm,Pistols,,,,,DEPLOYABLE_SHIELD
3,20170212,PC,HOSTAGE,CLUB_HOUSE,1522380841,4,CHURCH,Defender,AttackersEliminated,217,...,,,MuzzleBrake,PRB92,Pistols,,,,,IMPACT_GRENADE
4,20170212,PC,HOSTAGE,CLUB_HOUSE,1522380841,6,BEDROOM,Attacker,DefendersEliminated,143,...,Vertical,Laser,Suppressor,P12,Pistols,,,Laser,Suppressor,DEPLOYABLE_SHIELD


In [6]:
data.columns

Index(['dateid', 'platform', 'gamemode', 'mapname', 'matchid', 'roundnumber',
       'objectivelocation', 'winrole', 'endroundreason', 'roundduration',
       'clearancelevel', 'skillrank', 'role', 'team', 'haswon', 'operator',
       'nbkills', 'isdead', 'primaryweapon', 'primaryweapontype',
       'primarysight', 'primarygrip', 'primaryunderbarrel', 'primarybarrel',
       'secondaryweapon', 'secondaryweapontype', 'secondarysight',
       'secondarygrip', 'secondaryunderbarrel', 'secondarybarrel',
       'secondarygadget'],
      dtype='object')

In [7]:
operatori = data['operator'].unique()
print(operatori)

print(f"\nTotale operatori trovati in questo campione: {len(operatori)}")

['SWAT-CASTLE' 'GSG9-JAGER' 'JTF2-FROST' 'BOPE-CAVEIRA' 'G.E.O.-JACKAL'
 'GIGN-TWITCH' 'SWAT-ASH' 'JTF2-BUCK' 'SPETSNAZ-FUZE' 'GSG9-IQ'
 'NAVYSEAL-BLACKBEARD' 'SPETSNAZ-TACHANKA' 'GSG9-BANDIT' 'G.E.O.-MIRA'
 'SAT-HIBANA' 'NAVYSEAL-VALKYRIE' 'SPETSNAZ-GLAZ' 'SAS-MUTE'
 'SWAT-THERMITE' 'SWAT-PULSE' 'GIGN-DOC' 'SAT-ECHO' 'SAS-SLEDGE'
 'GIGN-MONTAGNE' 'SWAT-RESERVE' 'SAS-SMOKE' 'GIGN-ROOK' 'GSG9-BLITZ'
 'SPETSNAZ-KAPKAN' 'GSG9-RESERVE' 'BOPE-CAPITAO' 'SAS-THATCHER'
 'GIGN-RESERVE' 'SAS-RESERVE' 'SPETSNAZ-RESERVE']

Totale operatori trovati in questo campione: 35


In [8]:
COLONNE_MAP = {
    'matchid': 'match_id',
    'roundnumber': 'round_id',
    'mapname': 'map_name',
    'objectivelocation': 'site_name',
    'role': 'role',
    'operator': 'operator',
    'haswon': 'is_round_won',
    'nbkills': 'kills',
    'isdead': 'deaths',
    'gamemode': 'game_mode',
}

# Filtra
colonne_esistenti = {k: v for k, v in COLONNE_MAP.items() if k in data.columns}
data_pulito = data[list(colonne_esistenti.keys())].rename(columns=colonne_esistenti).copy()

# Pulizia nome operatore
data_pulito['operator'] = data_pulito['operator'].apply(lambda x: x.split('-')[-1] if isinstance(x, str) and '-' in x else x)

# Mappa scelta
data_pulito = data_pulito[data_pulito['map_name'] == 'CLUB_HOUSE'].copy()

#Pulizie varie
data_pulito['unique_round_id'] = data_pulito['match_id'].astype(str) + '_' + data_pulito['round_id'].astype(str)
data_pulito = data_pulito[data_pulito['role'] == 'Attacker']
cols = ['unique_round_id'] + [c for c in data_pulito.columns if c != 'unique_round_id']
data_pulito = data_pulito[cols]
data_pulito = data_pulito.drop(columns=['match_id', 'round_id'])


# Raggruppamento
data_pulito = data_pulito.groupby('unique_round_id').agg(
    lineup=('operator', lambda x: list(x.unique())),
    victory=('is_round_won', 'max'),
    site_name=('site_name', 'first'),
    game_mode=('game_mode', 'first')
)
data_pulito = data_pulito.reset_index()

print(data_pulito.head(30))

data_pulito = data_pulito[data_pulito['lineup'].apply(len) == 5].copy()
data_pulito = data_pulito[data_pulito['site_name'].str.contains('-')].copy()

print(data_pulito.head(30))

# HOE Operatori
mlb = MultiLabelBinarizer()
X_ops = pd.DataFrame(mlb.fit_transform(data_pulito['lineup']),
                     columns=mlb.classes_,
                     index=data_pulito.index)


# HOE Site
X_site = pd.get_dummies(data_pulito['site_name'], prefix='site')


# Unione
X = pd.concat([X_ops, X_site], axis=1)
y = data_pulito['victory']


X.shape
X.head(10)
y.head(10)

   unique_round_id                                        lineup  victory  \
0     1001330621_1     [CAPITAO, TWITCH, THERMITE, GLAZ, HIBANA]        0   
1     1001330621_2              [TWITCH, CAPITAO, ASH, THERMITE]        0   
2     1001330621_3              [TWITCH, GLAZ, THERMITE, HIBANA]        0   
3     1001330621_4                       [TWITCH, THERMITE, ASH]        0   
4     1001330621_5                   [TWITCH, GLAZ, HIBANA, ASH]        1   
5     1001330621_6                   [TWITCH, THERMITE, CAPITAO]        1   
6     1001330621_7              [TWITCH, GLAZ, THERMITE, HIBANA]        1   
7     1001330621_8                       [ASH, TWITCH, THERMITE]        1   
8     1001330621_9                [HIBANA, FUZE, TWITCH, JACKAL]        1   
9     1001484761_1     [HIBANA, SLEDGE, TWITCH, ASH, BLACKBEARD]        1   
10    1001484761_2             [JACKAL, TWITCH, BUCK, FUZE, ASH]        0   
11    1001484761_3     [TWITCH, BLACKBEARD, SLEDGE, ASH, HIBANA]        1   

0     0
28    1
29    0
30    1
31    1
32    1
42    0
43    1
45    1
51    1
Name: victory, dtype: int64

In [None]:
# Let's scale
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

#Ridge

grid = {'alpha': np.logspace(-6,3,30)}
def prob_err(y_true, y_pred):
  return np.mean(y_true * y_pred <= 0)
custom_scorer = make_scorer(prob_err, greater_is_better=False)
M = GridSearchCV(estimator  = RidgeClassifier(),
                 param_grid = grid,
                 cv         = 3,
                 scoring    = 'accuracy')
M.fit(X_train, y_train)
y_predict = M.predict(X_test)
acc = accuracy_score(y_test, y_predict)
perr = 100 * (1 - acc)
print(perr)
print(M.best_params_['alpha'])
print('%CM:\n ' +  str(confusion_matrix(y_test,y_predict)))

In [None]:
grid = {
    'alpha': [0.000001,0.00001, 0.0001, 0.001, 0.01, 0.1, 1],  # Quanto regolarizzare
    'kernel': ['rbf'],               # Kernel Non-Lineare
    'gamma': [0.000001,0.00001, 0.0001, 0.001, 0.01, 0.1, 1]   # Quanto sono "curve" le decisioni
}
def prob_err(y_true, y_pred):
  return np.mean(y_true * y_pred <= 0)
custom_scorer = make_scorer(prob_err, greater_is_better=False)
M = GridSearchCV(estimator  = KernelRidge(),
                 param_grid = grid,
                 cv         = 3,
                 scoring    = 'neg_mean_squared_error')
M.fit(X_train, y_train)
y_predict_continuous = M.predict(X_test)
# Convert continuous predictions to binary predictions using a threshold (e.g., 0.5)
y_predict = (y_predict_continuous > 0.5).astype(int)
acc = accuracy_score(y_test, y_predict)
perr = 100 * (1 - acc)
print(perr)
print(M.best_params_['alpha'], M.best_params_['gamma'])
print('%CM:\n ' +  str(confusion_matrix(y_test,y_predict)))