In [1]:
import pandas as pd 
import numpy as np  
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [2]:
df = pd.read_csv("Bayern_Munich_23-2024_Squad_Images_data.csv")
print("Dataframe Shape:", df.shape)
df.sample(5)

Dataframe Shape: (3680, 4098)


Unnamed: 0,player_name,label,0,1,2,3,4,5,6,7,...,4086,4087,4088,4089,4090,4091,4092,4093,4094,4095
151,Raphael_Guerrerio,2,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,35.0,77.0,216.0,224.0,175.0,59.0,97.0,0.0,0.0,0.0
1948,Bryan_Zaragoza,1,64.0,70.0,71.0,72.0,80.0,80.0,59.0,65.0,...,20.0,18.0,23.0,33.0,17.0,132.0,133.0,133.0,57.0,172.0
1900,Leon_Goretzka,0,30.0,39.0,72.0,29.0,39.0,72.0,74.0,95.0,...,242.0,211.0,217.0,9.0,174.0,236.0,17.0,29.0,57.0,205.0
1574,Konrad_Laimer,22,119.0,160.0,193.0,138.0,181.0,214.0,146.0,189.0,...,1.0,8.0,1.0,8.0,3.0,8.0,1.0,0.0,119.0,212.0
2958,Jamal_Musiala,16,31.0,40.0,49.0,35.0,40.0,45.0,29.0,33.0,...,249.0,246.0,244.0,243.0,242.0,238.0,228.0,232.0,241.0,250.0


# Transform Data

In [3]:
X = df.drop(['player_name', 'label'], axis="columns")
y = df['label']

X = X / 255

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

print("X Train Shape", X_train.shape)
print("X Test Shape:", X_test.shape)

X Train Shape (2944, 4096)
X Test Shape: (736, 4096)


# Model Building

In [5]:
def Find_best_model(X, y, model_params):
    scores = []

    for algo, mp in model_params.items():
        clf = GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
        clf.fit(X_train, y_train)
        scores.append({
            "Model": algo,
            "Best_score": clf.best_score_,
            "Best_params": clf.best_params_
        })

    clf_df = pd.DataFrame(scores, columns=['Model', 'Best_score', 'Best_params'])
    return clf_df

In [6]:
model_params = {
    'svm': {
        'model': SVC(gamma='auto',probability=True),
        'params' : {
            'C': [1,10,100,1000],
            'kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'n_estimators': [1,5,10]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto'),
        'params': {
            'C': [1,5,10]
        }
    }
}

# result = Find_best_model(X, y, model_params)
# result

Comment: From above this result we find that SVM Algorithm perform best. </br>
The Parameters: {C=1, kernel="linear"}

In [7]:
svm_model = SVC(C=1, kernel="linear")
svm_model.fit(X_train, y_train)
svm_model.score(X_test, y_test)

0.9021739130434783