In [1]:
import numpy as np
import pandas as pd

from dataqualitypipeline import DQPipeline
from dataqualitypipeline import initialize_autoencoder, initialize_autoencoder_modified

In [6]:
df_data = pd.read_csv("./HOWTO/players_20.csv")
df_data.head(1)

Unnamed: 0,sofifa_id,player_url,short_name,long_name,age,dob,height_cm,weight_kg,nationality,club,...,lwb,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb
0,158023,https://sofifa.com/player/158023/lionel-messi/...,L. Messi,Lionel Andrés Messi Cuccittini,32,1987-06-24,170,72,Argentina,FC Barcelona,...,68+2,66+2,66+2,66+2,68+2,63+2,52+2,52+2,52+2,63+2


# FIFA World Cup Player Ratings
---
https://www.kaggle.com/datasets/thedevastator/fifa-world-cup-anomaly-detection-in-player-ratin

### Unsupervised Anomaly Detection on (train) data

In [7]:
only_columns = df_data.columns.to_list()
# only_columns[:35]

In [12]:
from pyod.models.iforest import IForest
# import torch
clf_if = IForest(n_jobs=-1)
clf_ae = initialize_autoencoder_modified(epochs=120)

dq_pipe = DQPipeline(
    nominal_columns=["short_name","long_name","nationality","club","nation_position","player_tags","work_rate"],
    exclude_columns=["player_url"],
    time_column_names=["dob"],
    # deactivate_pattern_recognition=False,
    remove_columns_with_no_variance=True,
)

X_output = dq_pipe.run_pipeline(
    X_train=df_data.iloc[:,0:35],
    clf=clf_ae,
    dump_model=False,
)

Using cpu device
Batch size: 8192
15 cores will be used...
Only X_train input will be transformed...
Running only Transformation-Pipeline...
No Variance in follow Train Columns:  Index(['Preprocessing Pipeline__Datetime__timeseries__X__X__dob_HOUR',
       'Preprocessing Pipeline__Datetime__timeseries__X__X__dob_MINUTE',
       'Preprocessing Pipeline__Datetime__timeseries__X__X__dob_SECOND',
       'Preprocessing Pipeline__Datetime__timeseries__X__X__joined_HOUR',
       'Preprocessing Pipeline__Datetime__timeseries__X__X__joined_MINUTE',
       'Preprocessing Pipeline__Datetime__timeseries__X__X__joined_SECOND',
       'NaNMarker Pipeline__nan_marker_columns__missingindicator_sofifa_id',
       'NaNMarker Pipeline__nan_marker_columns__missingindicator_player_url',
       'NaNMarker Pipeline__nan_marker_columns__missingindicator_short_name',
       'NaNMarker Pipeline__nan_marker_columns__missingindicator_long_name',
       'NaNMarker Pipeline__nan_marker_columns__missingindicator_age

In [13]:
pd.set_option('display.max_columns', 40)

In [14]:
X_output.head(20)

Unnamed: 0,sofifa_id,player_url,short_name,long_name,age,dob,height_cm,weight_kg,nationality,club,overall,potential,value_eur,wage_eur,player_positions,preferred_foot,international_reputation,weak_foot,skill_moves,work_rate,body_type,real_face,release_clause_eur,player_tags,team_position,team_jersey_number,loaned_from,joined,contract_valid_until,nation_position,nation_jersey_number,pace,shooting,passing,dribbling,AnomalyScore,MAD_Total,Tukey_Total
10693,156321,https://sofifa.com/player/156321/adebayo-akinf...,A. Akinfenwa,Adebayo Akinfenwa,37,1982-05-10,178,110,England,Wycombe Wanderers,65,65,190000,2000,ST,Right,1,3,2,Low/Low,Akinfenwa,Yes,333000.0,#Strength,ST,20.0,,2016-07-10,2020.0,,,43.0,63.0,54.0,56.0,1.0,1,3
313,193348,https://sofifa.com/player/193348/xherdan-shaqi...,X. Shaqiri,Xherdan Shaqiri,27,1991-10-10,169,72,Switzerland,Liverpool,82,82,23000000,120000,"RW, RM",Left,3,4,5,Medium/Medium,Shaqiri,Yes,42600000.0,#Dribbler,SUB,23.0,,2018-07-13,2023.0,RS,23.0,80.0,77.0,81.0,85.0,0.566383,5,7
28,192119,https://sofifa.com/player/192119/thibaut-court...,T. Courtois,Thibaut Courtois,27,1992-05-11,199,96,Belgium,Real Madrid,88,89,48000000,235000,GK,Left,4,2,1,Medium/Medium,Courtois,Yes,102000000.0,,GK,13.0,,2018-08-09,2024.0,GK,1.0,,,,,0.562105,7,9
2,190871,https://sofifa.com/player/190871/neymar-da-sil...,Neymar Jr,Neymar da Silva Santos Junior,27,1992-02-05,175,68,Brazil,Paris Saint-Germain,92,92,105500000,290000,"LW, CAM",Right,5,5,5,High/Medium,Neymar,Yes,195200000.0,"#Speedster, #Dribbler, #Playmaker , #Crosser,...",CAM,10.0,,2017-08-03,2022.0,LW,10.0,91.0,85.0,87.0,95.0,0.339857,8,10
1,20801,https://sofifa.com/player/20801/c-ronaldo-dos-...,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,34,1985-02-05,187,83,Portugal,Juventus,93,93,58500000,405000,"ST, LW",Right,5,4,5,High/Low,C. Ronaldo,Yes,96500000.0,"#Speedster, #Dribbler, #Distance Shooter, #Acr...",LW,7.0,,2018-07-10,2022.0,LS,7.0,90.0,93.0,82.0,89.0,0.336553,8,11
0,158023,https://sofifa.com/player/158023/lionel-messi/...,L. Messi,Lionel Andrés Messi Cuccittini,32,1987-06-24,170,72,Argentina,FC Barcelona,94,94,95500000,565000,"RW, CF, ST",Left,5,4,4,Medium/Low,Messi,Yes,195800000.0,"#Dribbler, #Distance Shooter, #Crosser, #FK Sp...",RW,10.0,,2004-07-01,2021.0,,,87.0,92.0,92.0,96.0,0.324393,8,11
37,188350,https://sofifa.com/player/188350/marco-reus/20...,M. Reus,Marco Reus,30,1989-05-31,180,71,Germany,Borussia Dortmund,88,88,56000000,170000,"CAM, LM, ST",Right,4,4,4,High/Medium,Lean,Yes,92400000.0,"#Dribbler, #Playmaker , #Distance Shooter, #F...",CAM,11.0,,2012-07-01,2023.0,ST,11.0,85.0,88.0,84.0,87.0,0.143593,5,9
9,209331,https://sofifa.com/player/209331/mohamed-salah...,M. Salah,Mohamed Salah Ghaly,27,1992-06-15,175,71,Egypt,Liverpool,90,90,80500000,240000,"RW, ST",Left,3,3,4,High/Medium,PLAYER_BODY_TYPE_25,Yes,148900000.0,"#Speedster, #Dribbler, #Acrobat, #Clinical Fin...",RW,11.0,,2017-07-01,2023.0,RW,10.0,93.0,86.0,81.0,89.0,0.122221,5,9
1612,251691,https://sofifa.com/player/251691/alan-zamorado...,A. Zamorado,Alan Zamorado,38,1980-12-17,182,78,Ecuador,Ecuador,76,76,0,0,LM,Left,2,3,3,Medium/Low,Lean,No,,"#Distance Shooter, #FK Specialist",,,,,,LM,17.0,57.0,78.0,81.0,77.0,0.113353,0,1
13153,238464,https://sofifa.com/player/238464/jan-mlakar/20...,J. Mlakar,Jan Mlakar,20,1998-10-23,183,78,Slovenia,Queens Park Rangers,62,78,675000,2000,ST,Right,1,3,2,High/Medium,Normal,No,,,SUB,16.0,Brighton & Hove Albion,,2020.0,ST,9.0,63.0,61.0,48.0,60.0,0.110335,0,0
