In [1]:
import numpy as np
import pandas as pd
import csv


In [13]:
df = pd.read_csv('ea_fc_24_dataset.csv')

In [4]:
df = df.drop(['Unnamed: 0'], axis=1)
df

Unnamed: 0,Name,Position,Rating,Price,Version,Club,League,Nation,Skill Moves,Weak foot,...,Def W/R,Age,Height,Weight,Pace,Shooting,Pasing,Dribbling,Defending,Physical
0,Zinedine Zidane,CAM,95,5700000.0,Ultimate Dynasties Icon,Icons,ICO,France,5,5,...,MED,51,"6' 1"" / 185cm",77kg,84,91,95,95,76,85
1,Ronaldo Nazario,ST,94,10196000.0,Icon,Icons,ICO,Brazil,5,5,...,MED,47,"6' 0"" / 183cm",78kg,94,94,79,94,43,75
2,Ronaldinho,LW,94,10840000.0,Non-Inform,Icons,ICO,Brazil,5,4,...,LOW,43,"5' 11"" / 180cm",78kg,92,90,92,96,39,82
3,Erling Haaland,ST,94,2401000.0,Ultimate Dynasties,Manchester City,ENG 1,Norway,4,3,...,MED,23,"6' 5"" / 195cm",94kg,93,96,72,86,53,94
4,Johan Cruyff,CF,94,7000000.0,Non-Inform,Icons,ICO,Netherlands,5,5,...,LOW,76,"5' 11"" / 180cm",71kg,91,92,91,94,44,75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,Thibaut Courtois,GK,90,53000.0,Rare Gold,Real Madrid,ESP 1,Belgium,1,3,...,MED,31,"6' 6"" / 199cm",96kg,85,89,76,93,46,90
69,Luis Figo,RW,90,675000.0,Non-Inform,Icons,ICO,Portugal,4,4,...,LOW,51,"5' 11"" / 180cm",75kg,90,85,87,91,40,79
70,Kelly Smith,ST,90,510000.0,Non-Inform,Icons,ICO,England,4,4,...,MED,45,"5' 6"" / 168cm",59kg,89,91,86,91,47,78
71,Dennis Bergkamp,CF,90,99000.0,Icon,Icons,ICO,Netherlands,4,4,...,MED,54,"6' 0"" / 183cm",78kg,83,89,84,88,35,76


In [5]:
import pandas as pd

def process_height_weight(value, type='height'):
    if type == 'height':
        return int(value.split('/')[1].replace('cm', '').strip())
    elif type == 'weight':
        return int(value.replace('kg', '').strip())

df['Height'] = df['Height'].apply(process_height_weight, type='height')
df['Weight'] = df['Weight'].apply(process_height_weight, type='weight')

df = pd.get_dummies(df, columns=['Position', 'Club', 'League', 'Nation', 'Foot', 'Att W/R', 'Def W/R', 'Version'])
df


Unnamed: 0,Name,Rating,Price,Skill Moves,Weak foot,Age,Height,Weight,Pace,Shooting,...,Version_Icon,Version_Inform,Version_Nike MAD Ready,Version_Non-Inform,Version_Radioactive,Version_Rare Gold,Version_Trailblazers,Version_Triple Threat Icon,Version_Ultimate Dynasties,Version_Ultimate Dynasties Icon
0,Zinedine Zidane,95,5700000.0,5,5,51,185,77,84,91,...,0,0,0,0,0,0,0,0,0,1
1,Ronaldo Nazario,94,10196000.0,5,5,47,183,78,94,94,...,1,0,0,0,0,0,0,0,0,0
2,Ronaldinho,94,10840000.0,5,4,43,180,78,92,90,...,0,0,0,1,0,0,0,0,0,0
3,Erling Haaland,94,2401000.0,4,3,23,195,94,93,96,...,0,0,0,0,0,0,0,0,1,0
4,Johan Cruyff,94,7000000.0,5,5,76,180,71,91,92,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,Thibaut Courtois,90,53000.0,1,3,31,199,96,85,89,...,0,0,0,0,0,1,0,0,0,0
69,Luis Figo,90,675000.0,4,4,51,180,75,90,85,...,0,0,0,1,0,0,0,0,0,0
70,Kelly Smith,90,510000.0,4,4,45,168,59,89,91,...,0,0,0,1,0,0,0,0,0,0
71,Dennis Bergkamp,90,99000.0,4,4,54,183,78,83,89,...,1,0,0,0,0,0,0,0,0,0


In [6]:
from sklearn.tree import DecisionTreeClassifier

X = df.drop('Name', axis=1)
y = df['Name']

clf = DecisionTreeClassifier(criterion='entropy', random_state=42)
clf.fit(X, y)

model_info = {
    "depth": clf.tree_.max_depth,
    "number_of_nodes": clf.tree_.node_count,
    "number_of_leaves": clf.tree_.n_leaves
}

model_info

{'depth': 7, 'number_of_nodes': 145, 'number_of_leaves': 73}

In [36]:
def ask_question(feature, value, is_numeric=True):
    
    if is_numeric:
        question = f"Is the player's {feature} less than {value}? (yes/no): "
    else:
        feature_parts = feature.split('_')
        formatted_feature = ' '.join([part.capitalize() for part in feature_parts])
        question = f"Does the player have {feature_parts[0]} as {feature_parts[1]}? (yes/no): "
    
    answer = input(question).strip().lower()
    return answer == "yes"

def aki_game_decision_path(clf, features):

    node_indicator = clf.decision_path(np.zeros((1, len(features))))
    feature = clf.tree_.feature
    threshold = clf.tree_.threshold

    current_node = 0

    while True:
        if feature[current_node] == -2:  
            predicted_name = clf.classes_[np.argmax(clf.tree_.value[current_node])]
            print(f"The predicted player is: {predicted_name}")
            break

        feature_index = feature[current_node]
        feature_name = features[feature_index]
        feature_threshold = threshold[current_node]

        is_numeric = X[feature_name].dtype not in ['uint8']

        answer = ask_question(feature_name, feature_threshold, is_numeric)

        if answer:
            current_node = clf.tree_.children_left[current_node]  
        else:
            current_node = clf.tree_.children_right[current_node]  

features_list = X.columns.tolist()

aki_game_decision_path(clf, features_list)




Is the player's Weight less than 73.5? (yes/no): yes
Is the player's Pasing less than 87.5? (yes/no): no
Is the player's Dribbling less than 91.5? (yes/no): no
Is the player's Height less than 172.5? (yes/no): yes
Is the player's Weight less than 69.0? (yes/no): no
Is the player's Physical less than 69.5? (yes/no): no
Is the player's Price less than 3175000.0? (yes/no): no
The predicted player is: Zico


In [64]:
import re

for index, row in df.iterrows():
    weight = float(row['Weight'].replace('kg', ''))
    
    # Extract numerical value from the 'Height' column using regular expression
    height_match = re.search(r'\d+', row['Height'])
    if height_match:
        height = float(height_match.group())
    else:
        # Handle the case where no numerical value is found in the 'Height' column
        height = 0.0  # You may choose an appropriate default value or handle this case differently
    
    if weight < 73.5:
        if row['Pasing'] > 87.5:
            if row['Dribbling'] > 91.5:
                if weight > 69:
                    if row['Physical'] < 69.5:
                        print(row['Name'])

Garrincha
