In [42]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler


In [43]:
# Load the dataset
data = pd.read_csv('VCT_2024.csv')

# Display column names to verify
print(data.columns)


Index(['Region', 'Event', 'Player', 'Team Abbreviated', 'Team', 'Rnd', 'R',
       'ACS', 'K:D', 'KAST', 'ADR', 'KPR', 'APR', 'FKPR', 'FDPR', 'HS%', 'CL%',
       'CL', 'CW', 'CP', 'KMax', 'K', 'D', 'A', 'FK', 'FD'],
      dtype='object')


In [44]:
# Check the first few rows to understand data
data.head()


Unnamed: 0,Region,Event,Player,Team Abbreviated,Team,Rnd,R,ACS,K:D,KAST,...,CL%,CL,CW,CP,KMax,K,D,A,FK,FD
0,EMEA,Champions Tour 2024: EMEA Kickoff,AtaKaptan,FUT,FUT Esports,182,1.22,216.9,1.23,0.76,...,0.17,4/23,4.0,23.0,30,140,114,92,7,13
1,EMEA,Champions Tour 2024: EMEA Kickoff,N4RRATE,KC,Karmine Corp,383,1.21,249.9,1.27,0.78,...,0.15,5/33,5.0,33.0,40,342,269,125,47,31
2,EMEA,Champions Tour 2024: EMEA Kickoff,MiniBoo,TH,Team Heretics,208,1.19,254.7,1.22,0.69,...,0.4,6/15,6.0,15.0,25,184,151,39,45,36
3,EMEA,Champions Tour 2024: EMEA Kickoff,Chronicle,FNC,FNATIC,88,1.15,218.0,1.2,0.76,...,0.42,5/12,5.0,12.0,21,65,54,50,8,4
4,EMEA,Champions Tour 2024: EMEA Kickoff,Cloud,GX,GIANTX,124,1.15,241.3,1.17,0.71,...,0.13,2/16,2.0,16.0,22,104,89,45,10,8


In [45]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 775 entries, 0 to 774
Data columns (total 26 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Region            775 non-null    object 
 1   Event             775 non-null    object 
 2   Player            775 non-null    object 
 3   Team Abbreviated  775 non-null    object 
 4   Team              775 non-null    object 
 5   Rnd               775 non-null    int64  
 6   R                 544 non-null    float64
 7   ACS               775 non-null    float64
 8   K:D               775 non-null    float64
 9   KAST              544 non-null    float64
 10  ADR               662 non-null    float64
 11  KPR               775 non-null    float64
 12  APR               775 non-null    float64
 13  FKPR              662 non-null    float64
 14  FDPR              662 non-null    float64
 15  HS%               544 non-null    float64
 16  CL%               527 non-null    float64
 1

In [46]:
# Initialize SVM model (if classification, otherwise consider regression model)
svm_model = SVC(kernel='linear', C=1, probability=True)
svm_model.fit(X_train, y_train)

# Initialize Random Forest Classifier
rf_model = RandomForestClassifier(random_state=42)

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30]
}

grid_search = GridSearchCV(rf_model, param_grid, cv=3)
grid_search.fit(X_train, y_train)
best_rf_model = grid_search.best_estimator_

print("Best parameters for Random Forest:", grid_search.best_params_)



ValueError: could not convert string to float: '3/18'

In [None]:
# Predict
y_pred_svm = svm_model.predict(X_test)
y_pred_rf = best_rf_model.predict(X_test)

# Calculate metrics (assuming classification)
metrics = {
    'Accuracy': [accuracy_score(y_test, y_pred_svm), accuracy_score(y_test, y_pred_rf)],
    'Precision': [precision_score(y_test, y_pred_svm, average='weighted', zero_division=0),
                  precision_score(y_test, y_pred_rf, average='weighted', zero_division=0)],
    'Recall': [recall_score(y_test, y_pred_svm, average='weighted'),
               recall_score(y_test, y_pred_rf, average='weighted')],
    'F1 Score': [f1_score(y_test, y_pred_svm, average='weighted'),
                 f1_score(y_test, y_pred_rf, average='weighted')]
}

results_df = pd.DataFrame(metrics, index=['SVM', 'Random Forest'])

# Plotting performance comparison
results_df.plot(kind='bar', figsize=(10,6))
plt.title('Model Performance Comparison')
plt.ylabel('Score')
plt.xticks(rotation=0)
plt.ylim(0, 1)
plt.show()
