In [1]:
import pandas as pd

from Functions import split_data
from Functions import engineer_features
from Functions import accuracy_scorer
from Functions import remove_outliers_iqr
from Functions import normalize
from Functions import run_SVM_classifier


from sklearn.model_selection import GridSearchCV

In [2]:
data = pd.read_csv("../FIFA18_players_database/CompleteDataset.csv", low_memory=False)

In [3]:
data = engineer_features(data)
data = data.sample(frac=1).reset_index(drop=True)

In [4]:
X_train, X_test, y_train, y_test = split_data(data, 0)

In [None]:
### SVM trained on not normalized data and outliers included, no hyperparameter tuning.
y_test_pred, y_train_pred, classifier = run_SVM_classifier(X_train, X_test, y_train, 'linear', 1, True)

# Calculate test and train accuracy
acc_train = accuracy_scorer(y_train, y_train_pred)
acc_test = accuracy_scorer(y_test, y_test_pred)

# Print accuracy with 3 decimals
print(f"Accuracy of train: {acc_train:.3f}")
print(f"Accuracy of test: {acc_test:.3f}")

In [None]:
X_train_minmax = normalize(X_train, "minmax", False)
X_test_minmax = normalize(X_test, "minmax",False)

In [None]:
### SVM trained on normalized data but with outliers, no hyperparameter tuning
y_test_pred, y_train_pred, classifier = run_SVM_classifier(X_train_minmax, X_test_minmax, y_train, 'linear', 1, True)

acc_train = accuracy_scorer(y_train, y_train_pred)
acc_test = accuracy_scorer(y_test, y_test_pred)

print(f"Accuracy of train: {acc_train:.3f}")
print(f"Accuracy of test: {acc_test:.3f}")

In [None]:
df_no_outliers = remove_outliers_iqr(data)
X_train, X_test, y_train, y_test = split_data(df_no_outliers, 0)
X_train_minmax = normalize(X_train, "minmax", False)
X_test_minmax = normalize(X_test, "minmax",False)

In [None]:
### SVM trained on normalized data with outliers removed. No hyperparameter tuned.
y_test_pred, y_train_pred, classifier = run_SVM_classifier(X_train_minmax, X_test_minmax, y_train, 'linear', 1, True)

acc_train = accuracy_scorer(y_train, y_train_pred)
acc_test = accuracy_scorer(y_test, y_test_pred)

print(f"Accuracy of train: {acc_train:.3f}")
print(f"Accuracy of test: {acc_test:.3f}")

In [None]:
### SVM trained on un normalized data with outliers removed No hyperparameter tuned
y_test_pred, y_train_pred, classifier = run_SVM_classifier(X_train, X_test, y_train, 'linear', 1, True)

acc_train = accuracy_scorer(y_train, y_train_pred)
acc_test = accuracy_scorer(y_test, y_test_pred)

print(f"Accuracy of train: {acc_train:.3f}")
print(f"Accuracy of test: {acc_test:.3f}")

In [None]:
param_grid = {
    'C': [100],      
    'kernel': ['rbf'],     
    'gamma': ['auto'],       
    'class_weight': [None]  
}

grid_search = GridSearchCV(estimator=classifier, param_grid=param_grid, scoring='accuracy', cv=2, verbose=1, n_jobs=-1)


grid_search.fit(X_test, y_test)

print("Best Hyperparameters:", grid_search.best_params_)
best_model = grid_search.best_estimator_

Took too long to run grid search on the train data, so did it on the test data.

In [None]:
### SVM trained on normalized data with outliers removed. Hyperparameter tuned to the max
y_test_pred, y_train_pred, classifier = run_SVM_classifier(X_train_minmax, X_test_minmax, y_train, 'rbf', 100, True)

acc_train = accuracy_scorer(y_train, y_train_pred)
acc_test = accuracy_scorer(y_test, y_test_pred)

print(f"Accuracy of train: {acc_train:.3f}")
print(f"Accuracy of test: {acc_test:.3f}")


Marginal improvement result with RBF, and C=100. but the run time was reduced alot.