# FIFA World Cup Player Ratings
---
https://www.kaggle.com/datasets/thedevastator/fifa-world-cup-anomaly-detection-in-player-ratin

### Unsupervised Anomaly Detection on (train) data

In [2]:
import numpy as np
import pandas as pd

from dataqualitypipeline import DQPipeline
from dataqualitypipeline import initialize_autoencoder, initialize_autoencoder_modified

df_data = pd.read_csv("./HOWTO/players_20.csv")
df_data.head(1)

Unnamed: 0,sofifa_id,player_url,short_name,long_name,age,dob,height_cm,weight_kg,nationality,club,...,lwb,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb
0,158023,https://sofifa.com/player/158023/lionel-messi/...,L. Messi,Lionel Andrés Messi Cuccittini,32,1987-06-24,170,72,Argentina,FC Barcelona,...,68+2,66+2,66+2,66+2,68+2,63+2,52+2,52+2,52+2,63+2


In [3]:
only_columns = df_data.columns.to_list()
# only_columns[:-26]
only_columns[:37]

['sofifa_id',
 'player_url',
 'short_name',
 'long_name',
 'age',
 'dob',
 'height_cm',
 'weight_kg',
 'nationality',
 'club',
 'overall',
 'potential',
 'value_eur',
 'wage_eur',
 'player_positions',
 'preferred_foot',
 'international_reputation',
 'weak_foot',
 'skill_moves',
 'work_rate',
 'body_type',
 'real_face',
 'release_clause_eur',
 'player_tags',
 'team_position',
 'team_jersey_number',
 'loaned_from',
 'joined',
 'contract_valid_until',
 'nation_position',
 'nation_jersey_number',
 'pace',
 'shooting',
 'passing',
 'dribbling',
 'defending',
 'physic']

In [4]:
from pyod.models.iforest import IForest
from pyod.models.lof import LOF
# import torch
clf_if = IForest(n_jobs=-1)
clf_ae = initialize_autoencoder_modified(epochs=200)
clf_lof = LOF(n_jobs=-1)

dq_pipe = DQPipeline(
    nominal_columns=["player_tags","preferred_foot",
                     "work_rate","team_position","loaned_from"],

    exclude_columns=["player_url","body_type","short_name", "long_name", 
                     "team_jersey_number","joined","contract_valid_until",
                     "real_face","nation_position","player_positions","nationality","club"],

    time_column_names=["dob"],
    deactivate_pattern_recognition=True,
    remove_columns_with_no_variance=True,
)

X_output = dq_pipe.run_pipeline(
    X_train=df_data.iloc[:,0:37],
    # X_train=df_data.iloc[:,:-26],
    clf=clf_lof,
    dump_model=False,
)



Using cpu device
Batch size: 8192
15 cores will be used...
Only X_train input will be transformed...
Running only Transformation-Pipeline...
No Variance in follow Train Columns:  Index(['Preprocessing Pipeline__Datetime__timeseries__X__X__dob_HOUR',
       'Preprocessing Pipeline__Datetime__timeseries__X__X__dob_MINUTE',
       'Preprocessing Pipeline__Datetime__timeseries__X__X__dob_SECOND',
       'NaNMarker Pipeline__nan_marker_columns__missingindicator_sofifa_id',
       'NaNMarker Pipeline__nan_marker_columns__missingindicator_player_url',
       'NaNMarker Pipeline__nan_marker_columns__missingindicator_short_name',
       'NaNMarker Pipeline__nan_marker_columns__missingindicator_long_name',
       'NaNMarker Pipeline__nan_marker_columns__missingindicator_age',
       'NaNMarker Pipeline__nan_marker_columns__missingindicator_dob',
       'NaNMarker Pipeline__nan_marker_columns__missingindicator_height_cm',
       'NaNMarker Pipeline__nan_marker_columns__missingindicator_weight_kg'

In [5]:
# dq_pipe.X_train_transformed

In [6]:
pd.set_option('display.max_columns', 100)

In [7]:
X_output.head(40)

Unnamed: 0,sofifa_id,player_url,short_name,long_name,age,dob,height_cm,weight_kg,nationality,club,overall,potential,value_eur,wage_eur,player_positions,preferred_foot,international_reputation,weak_foot,skill_moves,work_rate,body_type,real_face,release_clause_eur,player_tags,team_position,team_jersey_number,loaned_from,joined,contract_valid_until,nation_position,nation_jersey_number,pace,shooting,passing,dribbling,defending,physic,AnomalyScore,MAD_Total,Tukey_Total
0,158023,https://sofifa.com/player/158023/lionel-messi/...,L. Messi,Lionel Andrés Messi Cuccittini,32,1987-06-24,170,72,Argentina,FC Barcelona,94,94,95500000,565000,"RW, CF, ST",Left,5,4,4,Medium/Low,Messi,Yes,195800000.0,"#Dribbler, #Distance Shooter, #Crosser, #FK Sp...",RW,10.0,,2004-07-01,2021.0,,,87.0,92.0,92.0,96.0,39.0,66.0,1.0,8,11
4,183277,https://sofifa.com/player/183277/eden-hazard/2...,E. Hazard,Eden Hazard,28,1991-01-07,175,74,Belgium,Real Madrid,91,91,90000000,470000,"LW, CF",Right,4,4,4,High/Medium,Normal,Yes,184500000.0,"#Speedster, #Dribbler, #Acrobat",LW,7.0,,2019-07-01,2024.0,LF,10.0,91.0,83.0,86.0,94.0,35.0,66.0,0.652752,7,9
17951,250804,https://sofifa.com/player/250804/joe-walsh/20/...,J. Walsh,Joe Walsh,17,2002-04-01,185,68,England,Gillingham,51,75,70000,1000,GK,Right,1,5,1,Medium/Medium,Normal,No,193000.0,,SUB,21.0,,2019-05-25,2022.0,,,,,,,,,0.604705,0,2
15879,242073,https://sofifa.com/player/242073/joaquin-blazq...,J. Blazquez,Joaquin Blazquez,19,2000-01-28,192,83,Argentina,Valencia CF,58,78,230000,2000,GK,Right,1,3,1,Medium/Medium,Lean,No,,,RES,42.0,Club Atlético Talleres,,2020.0,,,,,,,,,0.553566,0,0
2,190871,https://sofifa.com/player/190871/neymar-da-sil...,Neymar Jr,Neymar da Silva Santos Junior,27,1992-02-05,175,68,Brazil,Paris Saint-Germain,92,92,105500000,290000,"LW, CAM",Right,5,5,5,High/Medium,Neymar,Yes,195200000.0,"#Speedster, #Dribbler, #Playmaker , #Crosser,...",CAM,10.0,,2017-08-03,2022.0,LW,10.0,91.0,85.0,87.0,95.0,32.0,58.0,0.55347,9,11
7725,243675,https://sofifa.com/player/243675/kjell-scherpe...,K. Scherpen,Kjell Scherpen,19,2000-01-23,202,85,Netherlands,Ajax,67,81,1400000,2000,GK,Right,1,5,1,Medium/Medium,Normal,No,2800000.0,,RES,35.0,,2019-07-01,2023.0,,,,,,,,,0.543319,1,2
5564,214983,https://sofifa.com/player/214983/matias-ibanez...,M. Ibáñez,Matías Ibáñez,32,1986-12-16,190,88,Argentina,Patronato,70,70,1100000,7000,GK,Right,1,2,1,Medium/Medium,Normal,No,,,GK,12.0,Club Atlético Lanús,,2020.0,,,,,,,,,0.520964,0,0
13055,231993,https://sofifa.com/player/231993/wenyi-chi/20/...,Chi Wenyi,池文一,31,1988-02-18,183,70,China PR,Hebei China Fortune FC,63,63,240000,2000,GK,Right,1,1,1,Medium/Medium,Normal,No,,,SUB,33.0,Beijing Sinobo Guoan FC,,2019.0,,,,,,,,,0.51206,0,1
10,231747,https://sofifa.com/player/231747/kylian-mbappe...,K. Mbappé,Kylian Mbappé,20,1998-12-20,178,73,France,Paris Saint-Germain,89,95,93500000,155000,"ST, RW",Right,3,4,5,High/Low,Normal,Yes,191700000.0,"#Speedster, #Dribbler, #Acrobat",RW,7.0,,2018-07-01,2022.0,RM,10.0,96.0,84.0,78.0,90.0,39.0,75.0,0.510247,6,9
5985,213536,https://sofifa.com/player/213536/maxime-crepea...,M. Crépeau,Maxime Crépeau,25,1994-05-11,183,88,Canada,Vancouver Whitecaps FC,69,73,1000000,3000,GK,Right,1,2,1,Medium/Medium,Stocky,Yes,1600000.0,,GK,16.0,,2019-02-13,2022.0,SUB,1.0,,,,,,,0.499239,1,1
