In [122]:
import pandas as pd
import sqlite3

In [123]:
file_path = "../data/fifa_players.db"
query_string = "SELECT * FROM prem_name_join"

In [124]:
conn = sqlite3.connect(file_path)

In [125]:
df = pd.read_sql_query(query_string, conn)
df

Unnamed: 0,player_id,fifa_version,fifa_update,short_name,long_name,player_positions,overall,potential,value_eur,wage_eur,...,pace,shooting,passing,dribbling,defending,physic,attacking_crossing,attacking_finishing,goals,assists
0,192985.0,23,1.0,K. De Bruyne,Kevin De Bruyne,"CM, CAM",91.0,91.0,107500000.0,350000.0,...,74.0,88.0,93.0,87.0,64.0,77.0,94.0,85.0,7,18
1,20801.0,23,1.0,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,ST,90.0,90.0,41000000.0,220000.0,...,81.0,92.0,78.0,85.0,34.0,75.0,80.0,93.0,1,0
2,203376.0,23,1.0,V. van Dijk,Virgil van Dijk,CB,90.0,90.0,98000000.0,230000.0,...,81.0,60.0,71.0,72.0,91.0,86.0,53.0,52.0,3,1
3,209331.0,23,1.0,M. Salah,Mohamed Salah Ghaly,RW,90.0,90.0,115500000.0,270000.0,...,90.0,89.0,82.0,90.0,45.0,75.0,80.0,93.0,19,12
4,200104.0,23,1.0,H. Son,손흥민 孙兴慜,"LW, LM",89.0,89.0,101000000.0,240000.0,...,88.0,89.0,82.0,86.0,42.0,69.0,83.0,91.0,10,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6330,,16,,É. Capoue,Étienne Capoue,,,,,,...,,,,,,,,,0,1
6331,,17,,É. Capoue,Étienne Capoue,,,,,,...,,,,,,,,,7,1
6332,,18,,É. Capoue,Étienne Capoue,,,,,,...,,,,,,,,,1,0
6333,,19,,É. Capoue,Étienne Capoue,,,,,,...,,,,,,,,,1,3


### Exploring Null Rows in the Dataset

I'm currently exploring rows in the dataset that are **null in all columns except** `short_name`, `long_name`, `goals`, and `assists`. These nulls are likely the result of incomplete joins during the merge with goal/assist statistics.

Based on how the dataset was built, I’ve assumed that many of these players **joined the Premier League mid-season**. The FIFA data I used comes only from the **first update of each season**, so players who transferred in later might not be present in the FIFA table, but still show up in the goal/assist data scraped from Transfermarkt. 

To investigate these discrepancies:
- I’m conducting **exploratory analysis** of the null rows within the DataFrame to identify patterns and repeated player names (`long_name`) across null entries.
- For players with multiple null rows, I then use **BeeKeeper Studio** to look up their `player_id` and determine if it's possible to accurately attribute goal or assist data.

#### SQL Query Example

To search for a player's data in BeeKeeper Studio, I use queries like:

```sql
SELECT * FROM prem_nlongoin
WHERE short_name LIKE '%capoue%'
```

Where `Etienne Capoue` is just one example of a player being investigated.

---

#### Summary of Findings

- **40 players** have **2 or more null rows**
- **17 players** have **3 or more null rows**

I’m prioritizing players with **2 or more null rows**, as this strongly indicates a join failure due to name mismatch or missing FIFA registration.

Given there are **387 total null rows**, it's likely that some players:
- Only played in the Premier League for **a single season**
- Were not matched correctly due to **name inconsistencies**
- Consequently, had their goals and assists **not attributed** properly

This introduces a potential **bias** in the dataset, especially against short-term players with fewer contributions — who are more likely to be dropped during the join and not matched in scraping.


In [126]:
df_null = df[df['fifa_update'].isnull()]
df_null

Unnamed: 0,player_id,fifa_version,fifa_update,short_name,long_name,player_positions,overall,potential,value_eur,wage_eur,...,pace,shooting,passing,dribbling,defending,physic,attacking_crossing,attacking_finishing,goals,assists
5948,,22,,A. Lennon,Aaron Lennon,,,,,,...,,,,,,,,,2,0
5949,,18,,A. Sabiri,Abdelhamid Sabiri,,,,,,...,,,,,,,,,0,1
5950,,21,,A. Doucouré,Abdoulaye Doucouré,,,,,,...,,,,,,,,,2,3
5951,,15,,A. Hernández,Abel Hernández,,,,,,...,,,,,,,,,4,1
5952,,17,,A. Traoré,Adama Traoré,,,,,,...,,,,,,,,,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6330,,16,,É. Capoue,Étienne Capoue,,,,,,...,,,,,,,,,0,1
6331,,17,,É. Capoue,Étienne Capoue,,,,,,...,,,,,,,,,7,1
6332,,18,,É. Capoue,Étienne Capoue,,,,,,...,,,,,,,,,1,0
6333,,19,,É. Capoue,Étienne Capoue,,,,,,...,,,,,,,,,1,3


In [127]:
null_counts = df_null.groupby('long_name').size()

In [128]:
multi_null_names = null_counts[null_counts >= 2].index
multi_null_names

Index(['Adama Traoré', 'Ademola Lookman', 'Bojan Krkic', 'Branislav Ivanovic',
       'Carlos Vinícius', 'Chicharito', 'Chung-yong Lee', 'Dame N'Doye',
       'Dieumerci Mbokani', 'Gabriel Paulista', 'Gylfi Sigurdsson',
       'Hee-chan Hwang', 'Jay Rodríguez', 'Jeff Hendrick', 'Jonny Otto',
       'Jordan Ayew', 'Jose Cholevas', 'Jóhann Berg Gudmundsson',
       'Karlan Grant', 'Lazar Markovic', 'Luka Milivojevic',
       'Mahmoud Trezeguet', 'Marko Arnautovic', 'Martin Skrtel',
       'Martin Ødegaard', 'Massadio Haidara', 'Mateo Kovacic', 'Modou Barrow',
       'Nemanja Matic', 'Nikica Jelavic', 'Pierre-Emerick Aubameyang',
       'Sead Kolasinac', 'Serge Aurier', 'Sung-yueng Ki', 'Tanguy Ndombélé',
       'Thiago Alcántara', 'Tomas Soucek', 'Wout Weghorst', 'Zanka',
       'Étienne Capoue'],
      dtype='object', name='long_name')

In [129]:
suspicious_players = df[(df['long_name'].isin(multi_null_names)) & (df['fifa_update'].isnull())]
suspicious_players

Unnamed: 0,player_id,fifa_version,fifa_update,short_name,long_name,player_positions,overall,potential,value_eur,wage_eur,...,pace,shooting,passing,dribbling,defending,physic,attacking_crossing,attacking_finishing,goals,assists
5952,,17,,A. Traoré,Adama Traoré,,,,,,...,,,,,,,,,0,1
5953,,19,,A. Traoré,Adama Traoré,,,,,,...,,,,,,,,,1,1
5954,,17,,A. Lookman,Ademola Lookman,,,,,,...,,,,,,,,,1,0
5955,,21,,A. Lookman,Ademola Lookman,,,,,,...,,,,,,,,,4,4
5956,,22,,A. Lookman,Ademola Lookman,,,,,,...,,,,,,,,,6,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6330,,16,,É. Capoue,Étienne Capoue,,,,,,...,,,,,,,,,0,1
6331,,17,,É. Capoue,Étienne Capoue,,,,,,...,,,,,,,,,7,1
6332,,18,,É. Capoue,Étienne Capoue,,,,,,...,,,,,,,,,1,0
6333,,19,,É. Capoue,Étienne Capoue,,,,,,...,,,,,,,,,1,3


In [130]:
amount_of_unique_null_players = len(multi_null_names)
amount_of_unique_null_players

40

In [131]:
df[df['player_id']==178213.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
2070,178213.0,Etienne Capoue,20,0,0
2887,178213.0,Etienne Capoue,19,0,0
3564,178213.0,Etienne Capoue,18,0,0
4147,178213.0,Etienne Capoue,17,0,0
4805,178213.0,Etienne Capoue,16,0,0
5413,178213.0,Etienne Capoue,15,0,0


In [132]:
suspicious_players[suspicious_players['long_name'] == "Étienne Capoue"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6330,,Étienne Capoue,16,0,1
6331,,Étienne Capoue,17,7,1
6332,,Étienne Capoue,18,1,0
6333,,Étienne Capoue,19,1,3
6334,,Étienne Capoue,20,0,3


In [133]:
df.loc[4805, ['goals', 'assists']] = [0,1]
df.loc[4147, ['goals', 'assists']] = [7,1]
df.loc[3564, ['goals', 'assists']] = [1,0]
df.loc[2887, ['goals', 'assists']] = [1,3]
df.loc[2070, ['goals', 'assists']] = [0,3]

In [134]:
df[df['player_id']==183491.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
2895,183491.0,Mathias Jattah-Njie Jørgensen,19,0,0
3606,183491.0,Mathias Jattah-Njie Jørgensen,18,0,0


In [135]:
suspicious_players[suspicious_players['long_name'] == "Zanka"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6326,,Zanka,18,0,2
6327,,Zanka,19,3,1


In [136]:
df.loc[2895, ['goals', 'assists']] = [3,1]
df.loc[3606, ['goals', 'assists']] = [0,2]

In [137]:
df[df['player_id']==201153.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
2687,201153.0,Álvaro Borja Morata Martín,19,0,0
3337,201153.0,Álvaro Borja Morata Martín,18,0,0


In [138]:
df.loc[2687, ['goals', 'assists']] = [5,0]
df.loc[3337, ['goals', 'assists']] = [11,6]

Going  down  the  list,  Wout  Weghorst  is  not  in the  database

In [139]:
df[df['player_id']==236792.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
94,236792.0,Tomáš Souček,23,0,0
734,236792.0,Tomáš Souček,22,0,0
1400,236792.0,Tomáš Souček,21,0,0


In [140]:
suspicious_players[suspicious_players['long_name'] == "Tomas Soucek"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6299,,Tomas Soucek,20,3,0
6300,,Tomas Soucek,21,10,1
6301,,Tomas Soucek,22,5,1
6302,,Tomas Soucek,23,2,3


In [141]:
df.loc[1400, ['goals', 'assists']] = [10,1]
df.loc[734, ['goals', 'assists']] = [5,1]
df.loc[94, ['goals', 'assists']] = [2,3]

Soucek  is  an  example  of  both  being  a  late  transfer  and  also  being  a  name  error

In [142]:
df[df['player_id']==189509.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
22,189509.0,Thiago Alcântara do Nascimento,23,0,0
664,189509.0,Thiago Alcântara do Nascimento,22,0,0


In [143]:
suspicious_players[suspicious_players['long_name'] == "Thiago Alcántara"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6289,,Thiago Alcántara,21,1,0
6290,,Thiago Alcántara,22,1,4


In [144]:
df.loc[664, ['goals', 'assists']] = [1,4]

In [145]:
df[df['player_id']==235569.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
165,235569.0,Tanguy Ndombèlé Alvaro,23,0,0
733,235569.0,Tanguy Ndombèlé Alvaro,22,0,0
1428,235569.0,Tanguy Ndombèlé Alvaro,21,0,0
2064,235569.0,Tanguy Ndombèlé Alvaro,20,0,0


In [146]:
suspicious_players[suspicious_players['long_name'] == "Tanguy Ndombélé"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6284,,Tanguy Ndombélé,20,2,2
6285,,Tanguy Ndombélé,21,3,2
6286,,Tanguy Ndombélé,22,1,1


In [147]:
df.loc[733, ['goals', 'assists']] = [1,1]
df.loc[1428, ['goals', 'assists']] = [3,2]
df.loc[2064, ['goals', 'assists']] = [2,2]

In [148]:
df[df['player_id']==180283.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
2220,180283.0,기성용 寄诚庸,20,0,0
2889,180283.0,기성용 寄诚庸,19,0,0
3483,180283.0,기성용 寄诚庸,18,0,0
4148,180283.0,기성용 寄诚庸,17,0,0
4806,180283.0,기성용 寄诚庸,16,0,0
5548,180283.0,기성용 寄诚庸,15,0,0


In [149]:
suspicious_players[suspicious_players['long_name'] == "Sung-yueng Ki"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6278,,Sung-yueng Ki,15,8,1
6279,,Sung-yueng Ki,16,2,1
6280,,Sung-yueng Ki,17,0,1
6281,,Sung-yueng Ki,18,2,2
6282,,Sung-yueng Ki,19,0,1


In [150]:
df.loc[2889, ['goals', 'assists']] = [0,1]
df.loc[3483, ['goals', 'assists']] = [2,2]
df.loc[4148, ['goals', 'assists']] = [0,1]
df.loc[4806, ['goals', 'assists']] = [2,1]
df.loc[5548, ['goals', 'assists']] = [8,1]

In [151]:
df[df['player_id']==186452.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
3568,186452.0,Siem Stefan de Jong,18,0,0
4812,186452.0,Siem Stefan de Jong,16,0,0
5444,186452.0,Siem Stefan de Jong,15,0,0


In [152]:
df.loc[4812, ['goals', 'assists']] = [0,1]
df.loc[5444, ['goals', 'assists']] = [4,1]

In [153]:
df[df['player_id']==197853.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
759,197853.0,Serge Alain Stéphane Aurier,22,0,0
1410,197853.0,Serge Alain Stéphane Aurier,21,2,3
2108,197853.0,Serge Alain Stéphane Aurier,20,1,5
2731,197853.0,Serge Alain Stéphane Aurier,19,0,2


In [154]:
suspicious_players[suspicious_players['long_name'] == "Serge Aurier"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6267,,Serge Aurier,18,2,2
6268,,Serge Aurier,23,1,0


These  seasons  Aurier  must  have  joined  late

In [155]:
df[df['player_id']==207993.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
971,207993.0,Sead Kolašinac,22,0,0
1478,207993.0,Sead Kolašinac,21,0,0
2118,207993.0,Sead Kolašinac,20,0,0
2769,207993.0,Sead Kolašinac,19,0,0
3439,207993.0,Sead Kolašinac,18,0,0


In [156]:
suspicious_players[suspicious_players['long_name'] == "Sead Kolasinac"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6264,,Sead Kolasinac,18,2,4
6265,,Sead Kolasinac,19,0,6
6266,,Sead Kolasinac,20,0,2


In [157]:
df.loc[3439, ['goals', 'assists']] = [2,4]
df.loc[2769, ['goals', 'assists']] = [0,6]
df.loc[2118, ['goals', 'assists']] = [0,2]

In [158]:
df[df['player_id']==175932.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
5019,175932.0,Ritchie Ria Alfons De Laet,16,0,0
5704,175932.0,Ritchie Ria Alfons De Laet,15,0,0


In [159]:
df.loc[5019, ['goals', 'assists']] = [1,0]
df.loc[5704, ['goals', 'assists']] = [0,1]

In [160]:
df[df['player_id']==242641.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
289,242641.0,Rayan Aït Nouri,23,0,0
1067,242641.0,Rayan Aït Nouri,22,0,0


In [161]:
df.loc[289, ['goals', 'assists']] = [1,0]
df.loc[1067, ['goals', 'assists']] = [1,4]

In [162]:
df[df['player_id']==186190.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
1501,186190.0,Patrick John Miguel van Aanholt,21,0,0
2224,186190.0,Patrick John Miguel van Aanholt,20,0,0
2944,186190.0,Patrick John Miguel van Aanholt,19,0,0
3608,186190.0,Patrick John Miguel van Aanholt,18,0,0
4284,186190.0,Patrick John Miguel van Aanholt,17,0,0
4949,186190.0,Patrick John Miguel van Aanholt,16,0,0
5651,186190.0,Patrick John Miguel van Aanholt,15,0,0


In [163]:
df.loc[1501, ['goals', 'assists']] = [0,1]
df.loc[2224, ['goals', 'assists']] = [3,2]
df.loc[2944, ['goals', 'assists']] = [3,2]
df.loc[3608, ['goals', 'assists']] = [5,1]
df.loc[4284, ['goals', 'assists']] = [5,1]
df.loc[4949, ['goals', 'assists']] = [5,4]
df.loc[5651, ['goals', 'assists']] = [0,5]

In [164]:
df[df['player_id']==191202.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
781,191202.0,Nemanja Matić,22,0,0
1408,191202.0,Nemanja Matić,21,0,0
2052,191202.0,Nemanja Matić,20,0,0
2652,191202.0,Nemanja Matić,19,0,0
3350,191202.0,Nemanja Matić,18,0,0
4003,191202.0,Nemanja Matić,17,0,0
4647,191202.0,Nemanja Matić,16,0,0
5328,191202.0,Nemanja Matić,15,0,0


In [165]:
suspicious_players[suspicious_players['long_name'] == "Nemanja Matic"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6219,,Nemanja Matic,15,1,3
6220,,Nemanja Matic,16,2,2
6221,,Nemanja Matic,17,1,7
6222,,Nemanja Matic,18,1,1
6223,,Nemanja Matic,19,1,0
6224,,Nemanja Matic,20,0,2
6225,,Nemanja Matic,21,0,1
6226,,Nemanja Matic,22,0,4


In [166]:
df.loc[5328, ['goals', 'assists']] = [1,3]
df.loc[4647, ['goals', 'assists']] = [2,2]
df.loc[4003, ['goals', 'assists']] = [1,7]
df.loc[3350, ['goals', 'assists']] = [1,1]
df.loc[2652, ['goals', 'assists']] = [1,0]
df.loc[2052, ['goals', 'assists']] = [0,2]
df.loc[1408, ['goals', 'assists']] = [0,1]
df.loc[781, ['goals', 'assists']] = [0,4]

In [167]:
df[df['player_id']==207410.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
40,207410.0,Mateo Kovačić,23,0,0
709,207410.0,Mateo Kovačić,22,0,0
1363,207410.0,Mateo Kovačić,21,0,0
2042,207410.0,Mateo Kovačić,20,0,0


In [168]:
suspicious_players[suspicious_players['long_name'] == "Mateo Kovacic"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6193,,Mateo Kovacic,19,0,2
6194,,Mateo Kovacic,20,1,3
6195,,Mateo Kovacic,21,0,1
6196,,Mateo Kovacic,22,2,5
6197,,Mateo Kovacic,23,1,2


In [169]:
df.loc[40, ['goals', 'assists']] = [1,2]
df.loc[709, ['goals', 'assists']] = [2,5]
df.loc[1363, ['goals', 'assists']] = [0,1]
df.loc[2042, ['goals', 'assists']] = [1,3]

In [170]:
df[df['player_id']==201955.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
3761,201955.0,Massadio Haïdara,18,0,0
5031,201955.0,Massadio Haïdara,16,0,0
5718,201955.0,Massadio Haïdara,15,0,0


In [171]:
suspicious_players[suspicious_players['long_name'] == "Massadio Haidara"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6190,,Massadio Haidara,15,0,1
6191,,Massadio Haidara,16,0,1


In [172]:
df.loc[5718, ['goals', 'assists']] = [0,1]
df.loc[5031, ['goals', 'assists']] = [0,1]

In [173]:
df[df['player_id']==222665.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
43,222665.0,Martin Ødegaard,23,15,7


In [174]:
suspicious_players[suspicious_players['long_name'] == "Martin Ødegaard"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6186,,Martin Ødegaard,21,1,2
6187,,Martin Ødegaard,22,7,4


**Honestly**, not sure why Ødegaard is disregarded from the database for two seasons.  
I specified that players play in the *Prem*, and he is clearly part of **Arsenal**.  
Maybe he had a different player ID?

In [175]:
df[df['player_id']==166706.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
4676,166706.0,Martin Škrtel,16,0,0
5337,166706.0,Martin Škrtel,15,0,0


In [176]:
suspicious_players[suspicious_players['long_name'] == "Martin Skrtel"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6184,,Martin Skrtel,15,1,0
6185,,Martin Skrtel,16,1,0


In [177]:
df.loc[4676, ['goals', 'assists']] = [1,0]
df.loc[5337, ['goals', 'assists']] = [1,0]

In [178]:
df[df['player_id']==184200.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
2700,184200.0,Marko Arnautović,19,0,0
3372,184200.0,Marko Arnautović,18,0,0
4030,184200.0,Marko Arnautović,17,0,0
4771,184200.0,Marko Arnautović,16,0,0
5441,184200.0,Marko Arnautović,15,0,0


In [179]:
suspicious_players[suspicious_players['long_name'] == "Marko Arnautovic"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6178,,Marko Arnautovic,15,1,5
6179,,Marko Arnautovic,16,11,6
6180,,Marko Arnautovic,17,6,5
6181,,Marko Arnautovic,18,11,6
6182,,Marko Arnautovic,19,10,4


In [180]:
df.loc[5441, ['goals', 'assists']] = [1,5]
df.loc[4771, ['goals', 'assists']] = [11,6]
df.loc[4030, ['goals', 'assists']] = [6,5]
df.loc[3372, ['goals', 'assists']] = [11,6]
df.loc[2700, ['goals', 'assists']] = [10,4]

In [181]:
df[df['player_id']==206304.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
373,206304.0,Luka Milivojević,23,0,0
912,206304.0,Luka Milivojević,22,0,0
1442,206304.0,Luka Milivojević,21,0,0
2057,206304.0,Luka Milivojević,20,0,0
2796,206304.0,Luka Milivojević,19,0,0
3545,206304.0,Luka Milivojević,18,0,0


In [182]:
suspicious_players[suspicious_players['long_name'] == "Luka Milivojevic"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6160,,Luka Milivojevic,17,2,0
6161,,Luka Milivojevic,18,10,1
6162,,Luka Milivojevic,19,12,2
6163,,Luka Milivojevic,20,3,1
6164,,Luka Milivojevic,21,1,1


In [183]:
df.loc[3545, ['goals', 'assists']] = [10,1]
df.loc[2796, ['goals', 'assists']] = [12,2]
df.loc[2057, ['goals', 'assists']] = [3,1]
df.loc[1442, ['goals', 'assists']] = [1,1]

In [184]:
df[df['player_id']==200949.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
104,200949.0,Lucas Rodrigues Moura da Silva,23,1,0
105,200949.0,Lucas Rodrigues Moura da Silva,23,2,6
738,200949.0,Lucas Rodrigues Moura da Silva,22,2,6
739,200949.0,Lucas Rodrigues Moura da Silva,22,7,2
1360,200949.0,Lucas Rodrigues Moura da Silva,21,3,4
1361,200949.0,Lucas Rodrigues Moura da Silva,21,2,2
2020,200949.0,Lucas Rodrigues Moura da Silva,20,4,4
2021,200949.0,Lucas Rodrigues Moura da Silva,20,3,2
2709,200949.0,Lucas Rodrigues Moura da Silva,19,0,0


In [185]:
df.loc[2709, ['goals', 'assists']] = [10,1]

In [186]:
df[df['player_id']==212125.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
2968,212125.0,Lazar Marković,19,0,0
3586,212125.0,Lazar Marković,18,0,0
4176,212125.0,Lazar Marković,17,0,0
4828,212125.0,Lazar Marković,16,0,0
5424,212125.0,Lazar Marković,15,0,0


In [187]:
suspicious_players[suspicious_players['long_name'] == "Lazar Markovic"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6150,,Lazar Markovic,15,2,1
6151,,Lazar Markovic,17,2,0


In [188]:
df.loc[5424, ['goals', 'assists']] = [2,1]
df.loc[4176, ['goals', 'assists']] = [2,0]

In [189]:
df[df['player_id']==191076.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
955,191076.0,Johann Berg Guðmunds­son,22,0,0
1510,191076.0,Johann Berg Guðmunds­son,21,0,0
2152,191076.0,Johann Berg Guðmunds­son,20,0,0
2786,191076.0,Johann Berg Guðmunds­son,19,0,0
3683,191076.0,Johann Berg Guðmunds­son,18,0,0
4368,191076.0,Johann Berg Guðmunds­son,17,1,2


In [190]:
suspicious_players[suspicious_players['long_name'] == "Jóhann Berg Gudmundsson"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6131,,Jóhann Berg Gudmundsson,18,2,8
6132,,Jóhann Berg Gudmundsson,19,3,6
6133,,Jóhann Berg Gudmundsson,20,1,1
6134,,Jóhann Berg Gudmundsson,21,2,0
6135,,Jóhann Berg Gudmundsson,22,0,1


In [191]:
df.loc[3683, ['goals', 'assists']] = [2,8]
df.loc[2786, ['goals', 'assists']] = [3,6]
df.loc[2152, ['goals', 'assists']] = [1,1]
df.loc[1510, ['goals', 'assists']] = [2,0]
df.loc[955, ['goals', 'assists']] = [0,1]

In [192]:
df[df['player_id']==182744.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
2141,182744.0,José Holebas,20,0,0
2891,182744.0,José Holebas,19,0,0
3484,182744.0,José Holebas,18,0,0
4197,182744.0,José Holebas,17,0,0
4807,182744.0,José Holebas,16,0,0


In [193]:
suspicious_players[suspicious_players['long_name'] == "Jose Cholevas"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6119,,Jose Cholevas,16,1,0
6120,,Jose Cholevas,17,2,4
6121,,Jose Cholevas,18,0,4
6122,,Jose Cholevas,19,3,6


In [194]:
df.loc[4807, ['goals', 'assists']] = [1,0]
df.loc[4197, ['goals', 'assists']] = [2,4]
df.loc[3484, ['goals', 'assists']] = [0,4]
df.loc[2891, ['goals', 'assists']] = [3,6]

In [195]:
df[df['player_id']==197756.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
306,197756.0,Jordan Pierre Ayew,23,4,3
957,197756.0,Jordan Pierre Ayew,22,3,3
1515,197756.0,Jordan Pierre Ayew,21,1,3
2290,197756.0,Jordan Pierre Ayew,20,9,2
3574,197756.0,Jordan Pierre Ayew,18,7,2
4863,197756.0,Jordan Pierre Ayew,16,7,0


In [196]:
suspicious_players[suspicious_players['long_name'] == "Jordan Ayew"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6117,,Jordan Ayew,17,1,3
6118,,Jordan Ayew,19,1,2


These  seasons  are  not  in  the  database

In [197]:
df[df['player_id']==210455.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
149,210455.0,Jonathan Castro Otto,23,0,0
761,210455.0,Jonathan Castro Otto,22,0,0
1391,210455.0,Jonathan Castro Otto,21,0,0
2121,210455.0,Jonathan Castro Otto,20,0,0
2799,210455.0,Jonathan Castro Otto,19,0,0


In [198]:
suspicious_players[suspicious_players['long_name'] == "Jonny Otto"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6113,,Jonny Otto,19,1,1
6114,,Jonny Otto,20,2,2
6115,,Jonny Otto,22,2,0
6116,,Jonny Otto,23,1,0


In [199]:
df.loc[149, ['goals', 'assists']] = [1,0]
df.loc[761, ['goals', 'assists']] = [2,0]
df.loc[2121, ['goals', 'assists']] = [2,2]
df.loc[2799, ['goals', 'assists']] = [1,1]

In [200]:
df[df['player_id']==200478.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
1008,200478.0,Jeff Patrick Hendrick,22,1,0
2294,200478.0,Jeff Patrick Hendrick,20,2,2
2957,200478.0,Jeff Patrick Hendrick,19,3,0
3624,200478.0,Jeff Patrick Hendrick,18,2,2


In [201]:
suspicious_players[suspicious_players['long_name'] == "Jeff Hendrick"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6103,,Jeff Hendrick,17,2,1
6104,,Jeff Hendrick,21,2,1


Joined  late  that  season

In [202]:
df[df['player_id']==169792.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
994,169792.0,Jay Rodriguez,22,7,2
1546,169792.0,Jay Rodriguez,21,2,2
2217,169792.0,Jay Rodriguez,20,3,2
3562,169792.0,Jay Rodriguez,18,0,0
4192,169792.0,Jay Rodriguez,17,0,0
4803,169792.0,Jay Rodriguez,16,0,0
5508,169792.0,Jay Rodriguez,15,0,0


In [203]:
suspicious_players[suspicious_players['long_name'] == "Jay Rodríguez"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6096,,Jay Rodríguez,17,5,2
6097,,Jay Rodríguez,18,7,1
6098,,Jay Rodríguez,20,8,2
6099,,Jay Rodríguez,21,1,2


In [204]:
df.loc[1546, ['goals', 'assists']] = [1,2]
df.loc[2217, ['goals', 'assists']] = [8,2]
df.loc[3562, ['goals', 'assists']] = [7,1]
df.loc[4192, ['goals', 'assists']] = [5,2]

In [205]:
df[df['player_id']==226380.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
224,226380.0,황희찬 黄喜灿,23,0,0


Hee-chan Hwang

In [240]:
suspicious_players[suspicious_players['long_name'] == "Hee-chan Hwang"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6083,,Hee-chan Hwang,22,5,0
6084,,Hee-chan Hwang,23,3,1


In [241]:
df.loc[224, ['goals', 'assists']] = [3,1]

In [206]:
df[df['player_id']==184484.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
777,184484.0,Gylfi Þór Sigurðsson,22,0,0
1435,184484.0,Gylfi Þór Sigurðsson,21,0,0
2016,184484.0,Gylfi Þór Sigurðsson,20,0,0
2701,184484.0,Gylfi Þór Sigurðsson,19,0,0
3373,184484.0,Gylfi Þór Sigurðsson,18,0,0
4031,184484.0,Gylfi Þór Sigurðsson,17,0,0
4772,184484.0,Gylfi Þór Sigurðsson,16,0,0
5442,184484.0,Gylfi Þór Sigurðsson,15,0,0


In [242]:
suspicious_players[suspicious_players['long_name'] == "Gylfi Sigurdsson"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6073,,Gylfi Sigurdsson,15,7,10
6074,,Gylfi Sigurdsson,16,11,4
6075,,Gylfi Sigurdsson,17,9,13
6076,,Gylfi Sigurdsson,18,4,3
6077,,Gylfi Sigurdsson,19,13,6
6078,,Gylfi Sigurdsson,20,2,3
6079,,Gylfi Sigurdsson,21,6,5


In [243]:
df.loc[1435, ['goals', 'assists']] = [3,1]
df.loc[2016, ['goals', 'assists']] = [3,1]
df.loc[2701, ['goals', 'assists']] = [3,1]
df.loc[3373, ['goals', 'assists']] = [4,3]
df.loc[4031, ['goals', 'assists']] = [13,6]
df.loc[4772, ['goals', 'assists']] = [2,3]
df.loc[5442, ['goals', 'assists']] = [6,5]

In [207]:
df[df['player_id']==201305.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
4129,201305.0,Gabriel Armando de Abreu,17,0,0
4780,201305.0,Gabriel Armando de Abreu,16,0,0


In [245]:
suspicious_players[suspicious_players['long_name'] == "Gabriel Paulista"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6062,,Gabriel Paulista,15,0,1
6063,,Gabriel Paulista,16,1,0


In [246]:
df.loc[4780, ['goals', 'assists']] = [1,0]

In [208]:
df[df['player_id']==232580.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
92,232580.0,Gabriel dos Santos Magalhães,23,0,0
809,232580.0,Gabriel dos Santos Magalhães,22,0,0


In [247]:
df.loc[809, ['goals', 'assists']] = [5,0]
df.loc[92, ['goals', 'assists']] = [3,0]

In [209]:
df[df['player_id']==162131.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
2845,162131.0,Fernando Javier Llorente Torres,19,0,0
3479,162131.0,Fernando Javier Llorente Torres,18,0,0
4107,162131.0,Fernando Javier Llorente Torres,17,0,0


In [248]:
df.loc[2845, ['goals', 'assists']] = [1,4]
df.loc[3479, ['goals', 'assists']] = [1,0]
df.loc[4107, ['goals', 'assists']] = [15,1]

In [210]:
df[df['player_id']==201118.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
308,201118.0,Cédric Ricardo Alves Soares,23,0,0
907,201118.0,Cédric Ricardo Alves Soares,22,0,0
1562,201118.0,Cédric Ricardo Alves Soares,21,0,0
2238,201118.0,Cédric Ricardo Alves Soares,20,0,0
2866,201118.0,Cédric Ricardo Alves Soares,19,0,0
3465,201118.0,Cédric Ricardo Alves Soares,18,0,0
4166,201118.0,Cédric Ricardo Alves Soares,17,0,0
4779,201118.0,Cédric Ricardo Alves Soares,16,0,0


In [249]:
df.loc[2866, ['goals', 'assists']] = [1,2]
df.loc[2238, ['goals', 'assists']] = [1,1]
df.loc[1562, ['goals', 'assists']] = [0,1]
df.loc[907, ['goals', 'assists']] = [1,1]

In [211]:
df[df['player_id']==155355.0][['player_id', 'short_name','long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,short_name,long_name,fifa_version,goals,assists
3710,155355.0,Lee Chung Yong,이청용 李青龙,18,0,0
4304,155355.0,Lee Chung Yong,이청용 李青龙,17,0,0
4983,155355.0,Lee Chung Yong,이청용 李青龙,16,0,0


In [250]:
suspicious_players[suspicious_players['long_name'] == "Chung-yong Lee"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6013,,Chung-yong Lee,16,1,0
6014,,Chung-yong Lee,17,0,1


In [251]:
df.loc[4983, ['goals', 'assists']] = [1,0]
df.loc[4304, ['goals', 'assists']] = [0,1]

In [212]:
df[df['player_id']==178224.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
2140,178224.0,Javier Hernández Balcázar,20,0,0
2784,178224.0,Javier Hernández Balcázar,19,0,0
3368,178224.0,Javier Hernández Balcázar,18,0,0
4707,178224.0,Javier Hernández Balcázar,16,0,0
5338,178224.0,Javier Hernández Balcázar,15,0,0


In [252]:
suspicious_players[suspicious_players['long_name'] == "Chicharito"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
6007,,Chicharito,18,8,0
6008,,Chicharito,19,7,1
6009,,Chicharito,20,1,0


In [253]:
df.loc[3368, ['goals', 'assists']] = [8,0]
df.loc[2784, ['goals', 'assists']] = [7,1]
df.loc[2140, ['goals', 'assists']] = [1,0]

In [213]:
df[df['player_id']==178372.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
4085,178372.0,Branislav Ivanović,17,0,0
4708,178372.0,Branislav Ivanović,16,0,0
5339,178372.0,Branislav Ivanović,15,0,0


In [255]:
suspicious_players[suspicious_players['long_name'] == "Branislav Ivanovic"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
5988,,Branislav Ivanovic,15,4,6
5989,,Branislav Ivanovic,16,2,2


In [256]:
df.loc[5339, ['goals', 'assists']] = [4,6]
df.loc[4708, ['goals', 'assists']] = [2,2]

In [214]:
df[df['player_id']==176993.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
3451,176993.0,Bojan Krkić Pérez,18,0,0
4025,176993.0,Bojan Krkíc Pérez,17,0,0
4678,176993.0,Bojan Krkíc Pérez,16,0,0
5438,176993.0,Bojan Krkíc Pérez,15,0,0


In [257]:
suspicious_players[suspicious_players['long_name'] == "Bojan Krkic"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
5984,,Bojan Krkic,15,4,1
5985,,Bojan Krkic,16,7,1
5986,,Bojan Krkic,17,3,0


In [258]:
df.loc[5438, ['goals', 'assists']] = [4,1]
df.loc[4678, ['goals', 'assists']] = [7,1]
df.loc[4025, ['goals', 'assists']] = [3,0]

In [215]:
df[df['player_id']==191005.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
1733,191005.0,Ahmed Eissa El Mohamady Abdel Fattah,21,0,0
2442,191005.0,Ahmed Eissa El Mohamady Abdel Fattah,20,0,0
4290,191005.0,Ahmed Eissa El Mohamady Abdel Fattah,17,0,0
5627,191005.0,Ahmed Eissa El Mohamady Abdel Fattah,15,0,0


In [259]:
df.loc[5627, ['goals', 'assists']] = [2,5]
df.loc[4290, ['goals', 'assists']] = [0,2]
df.loc[2442, ['goals', 'assists']] = [1,1]

In [216]:
df[df['player_id']==230899.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
3102,230899.0,Ademola Lookman,19,0,2
3850,230899.0,Ademola Lookman,18,0,0


In [261]:
suspicious_players[suspicious_players['long_name'] == "Ademola Lookman"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
5954,,Ademola Lookman,17,1,0
5955,,Ademola Lookman,21,4,4
5956,,Ademola Lookman,22,6,0


In [217]:
df[df['player_id']==213956.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
151,213956.0,Adama Traoré Diarra,23,2,2
833,213956.0,Adama Traoré Diarra,22,0,0
1447,213956.0,Adama Traoré Diarra,21,2,3
2350,213956.0,Adama Traoré Diarra,20,4,9
5064,213956.0,Adama Traoré Diarra,16,0,1


In [263]:
suspicious_players[suspicious_players['long_name'] == "Adama Traoré"][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
5952,,Adama Traoré,17,0,1
5953,,Adama Traoré,19,1,1


In [218]:
df[df['fifa_update'].notna()]

Unnamed: 0,player_id,fifa_version,fifa_update,short_name,long_name,player_positions,overall,potential,value_eur,wage_eur,...,pace,shooting,passing,dribbling,defending,physic,attacking_crossing,attacking_finishing,goals,assists
0,192985.0,23,1.0,K. De Bruyne,Kevin De Bruyne,"CM, CAM",91.0,91.0,107500000.0,350000.0,...,74.0,88.0,93.0,87.0,64.0,77.0,94.0,85.0,7,18
1,20801.0,23,1.0,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,ST,90.0,90.0,41000000.0,220000.0,...,81.0,92.0,78.0,85.0,34.0,75.0,80.0,93.0,1,0
2,203376.0,23,1.0,V. van Dijk,Virgil van Dijk,CB,90.0,90.0,98000000.0,230000.0,...,81.0,60.0,71.0,72.0,91.0,86.0,53.0,52.0,3,1
3,209331.0,23,1.0,M. Salah,Mohamed Salah Ghaly,RW,90.0,90.0,115500000.0,270000.0,...,90.0,89.0,82.0,90.0,45.0,75.0,80.0,93.0,19,12
4,200104.0,23,1.0,H. Son,손흥민 孙兴慜,"LW, LM",89.0,89.0,101000000.0,240000.0,...,88.0,89.0,82.0,86.0,42.0,69.0,83.0,91.0,10,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5943,223984.0,15,1.0,R. Fallowfield,Ryan Jack Glenn Fallowfield,RM,52.0,65.0,40000.0,2000.0,...,66.0,44.0,45.0,58.0,33.0,49.0,45.0,41.0,0,0
5944,210424.0,15,1.0,K. Kennedy,Kieran Kennedy,CB,51.0,62.0,40000.0,2000.0,...,53.0,25.0,31.0,38.0,52.0,58.0,28.0,21.0,0,0
5945,207602.0,15,1.0,J. Gordon,Jaanai Gordon,ST,50.0,65.0,30000.0,2000.0,...,80.0,49.0,42.0,57.0,26.0,43.0,45.0,54.0,0,0
5946,220015.0,15,1.0,B. Lewis,Bradley Lewis,CB,49.0,65.0,20000.0,2000.0,...,56.0,28.0,28.0,30.0,49.0,56.0,23.0,25.0,0,0


In [219]:
df[df['player_id']==135507.0]

Unnamed: 0,player_id,fifa_version,fifa_update,short_name,long_name,player_positions,overall,potential,value_eur,wage_eur,...,pace,shooting,passing,dribbling,defending,physic,attacking_crossing,attacking_finishing,goals,assists
699,135507.0,22,1.0,Fernandinho,Fernando Luiz Rosa,"CDM, CB",83.0,83.0,7000000.0,100000.0,...,59.0,72.0,75.0,78.0,84.0,75.0,68.0,66.0,2,1
1346,135507.0,21,1.0,Fernandinho,Fernando Luiz Rosa,"CB, CDM",84.0,84.0,11000000.0,110000.0,...,64.0,72.0,78.0,78.0,85.0,79.0,68.0,66.0,0,2
1986,135507.0,20,1.0,Fernandinho,Fernando Luiz Rosa,CDM,87.0,87.0,19500000.0,200000.0,...,66.0,74.0,79.0,78.0,84.0,79.0,68.0,69.0,0,1
2649,135507.0,19,1.0,Fernandinho,Fernando Luiz Rosa,CDM,86.0,86.0,18000000.0,180000.0,...,70.0,74.0,77.0,79.0,83.0,81.0,68.0,69.0,1,3
3359,135507.0,18,1.0,Fernandinho,Fernando Luiz Rosa,"CDM, CM, RB",82.0,82.0,12500000.0,130000.0,...,73.0,73.0,76.0,78.0,77.0,78.0,68.0,69.0,5,3
4046,135507.0,17,1.0,Fernandinho,Fernando Luiz Rosa,"CM, CDM",81.0,81.0,14000000.0,130000.0,...,77.0,75.0,77.0,78.0,76.0,78.0,68.0,70.0,2,1
4694,135507.0,16,1.0,Fernandinho,Fernando Luiz Rosa,"CM, CDM",80.0,80.0,12500000.0,100000.0,...,77.0,74.0,78.0,80.0,74.0,77.0,77.0,70.0,2,3
4695,135507.0,16,1.0,Fernandinho,Fernando Luiz Rosa,"CM, CDM",80.0,80.0,12500000.0,100000.0,...,77.0,74.0,78.0,80.0,74.0,77.0,77.0,70.0,2,0
5317,135507.0,15,1.0,Fernandinho,Fernando Luiz Rosa,"CM, CDM",82.0,82.0,14500000.0,130000.0,...,78.0,74.0,82.0,81.0,73.0,75.0,78.0,70.0,3,4
5318,135507.0,15,1.0,Fernandinho,Fernando Luiz Rosa,"CM, CDM",82.0,82.0,14500000.0,130000.0,...,78.0,74.0,82.0,81.0,73.0,75.0,78.0,70.0,2,2


In [220]:
df = df.drop([5318, 4695])

### Cleaning Goals/Assists Data

The SQL join I used to connect goals and assists stats to FIFA players was intentionally **overfitted**. This was necessary because some players were not being correctly matched — their names in the FIFA database were in other languages or used full names that didn't match the format on Transfermarkt (where I sourced the stats).

Using BeeKeeper Studio, I found:
- **98 rows** where players were assigned goals or assists **more than once**
- These duplicates involved only **24 unique players** that needed correction

I manually looked up each of these players' Premier League goals and assists for each season, and wrote them down. This allowed me to remove incorrect duplicates from the DataFrame.

As a result, the data should now be much **cleaner**.

**Example:** Fernandinho was the first player I corrected.
 corrected.


In [221]:
df[df['player_id']==200054.0]

Unnamed: 0,player_id,fifa_version,fifa_update,short_name,long_name,player_positions,overall,potential,value_eur,wage_eur,...,pace,shooting,passing,dribbling,defending,physic,attacking_crossing,attacking_finishing,goals,assists
2864,200054.0,19,1.0,Pedro Obiang,Pedro Mba Obiang Avomo,"CDM, CM",77.0,80.0,9000000.0,60000.0,...,67.0,59.0,74.0,74.0,75.0,76.0,68.0,50.0,8,2
2865,200054.0,19,1.0,Pedro Obiang,Pedro Mba Obiang Avomo,"CDM, CM",77.0,80.0,9000000.0,60000.0,...,67.0,59.0,74.0,74.0,75.0,76.0,68.0,50.0,0,1
3540,200054.0,18,1.0,Pedro Obiang,Pedro Mba Obiang Avomo,"CDM, CM",77.0,82.0,10000000.0,80000.0,...,68.0,59.0,75.0,74.0,75.0,77.0,68.0,50.0,4,2
3541,200054.0,18,1.0,Pedro Obiang,Pedro Mba Obiang Avomo,"CDM, CM",77.0,82.0,10000000.0,80000.0,...,68.0,59.0,75.0,74.0,75.0,77.0,68.0,50.0,2,0
4254,200054.0,17,1.0,Pedro Obiang,Pedro Mba Obiang Avomo,"CDM, CM",76.0,80.0,6000000.0,80000.0,...,68.0,59.0,74.0,72.0,74.0,76.0,68.0,50.0,9,9
4255,200054.0,17,1.0,Pedro Obiang,Pedro Mba Obiang Avomo,"CDM, CM",76.0,80.0,6000000.0,80000.0,...,68.0,59.0,74.0,72.0,74.0,76.0,68.0,50.0,1,1
4962,200054.0,16,1.0,Pedro Obiang,Pedro Mba Obiang Avomo,"CM, CDM",74.0,81.0,4700000.0,35000.0,...,68.0,59.0,74.0,72.0,73.0,77.0,68.0,50.0,7,2
4963,200054.0,16,1.0,Pedro Obiang,Pedro Mba Obiang Avomo,"CM, CDM",74.0,81.0,4700000.0,35000.0,...,68.0,59.0,74.0,72.0,73.0,77.0,68.0,50.0,0,1


In [222]:
df = df.drop([4692, 4254, 3540, 2864])

In [223]:
df[df['player_id']==188377.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
29,188377.0,Kyle Walker,23,1,0
679,188377.0,Kyle Walker,22,0,2
680,188377.0,Kyle Walker,22,1,2
1336,188377.0,Kyle Walker,21,1,1
1337,188377.0,Kyle Walker,21,0,3
2005,188377.0,Kyle Walker,20,1,4
2671,188377.0,Kyle Walker,19,1,1
2672,188377.0,Kyle Walker,19,0,3
3348,188377.0,Kyle Walker,18,0,6
3349,188377.0,Kyle Walker,18,0,2


In [224]:
df = df.drop([3349, 2672, 1337, 680])
df.loc[29, ['goals', 'assists']] = [0, 0]

In [225]:
df[df['player_id']==227927.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
191,227927.0,Kyle Walker-Peters,23,1,0
876,227927.0,Kyle Walker-Peters,22,0,2
877,227927.0,Kyle Walker-Peters,22,1,2
1627,227927.0,Kyle Walker-Peters,21,1,1
1628,227927.0,Kyle Walker-Peters,21,0,3
2362,227927.0,Kyle Walker-Peters,20,1,4
3138,227927.0,Kyle Walker-Peters,19,1,1
3139,227927.0,Kyle Walker-Peters,19,0,3
3832,227927.0,Kyle Walker-Peters,18,0,6
3833,227927.0,Kyle Walker-Peters,18,0,2


In [226]:
df = df.drop([3832, 3138, 1627, 876])
df.loc[2362, ['goals', 'assists']] = [0, 0] #Attributed Kyle Walker's Stats
df.loc[4570, ['goals', 'assists']] = [0, 0] # Attributed Kyle Walker's Stats

In [227]:
df[df['player_id']==200778.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
1738,200778.0,Cyrus Sylvester Frederick Christie,21,1,0
3061,200778.0,Cyrus Sylvester Frederick Christie,19,0,1
3062,200778.0,Cyrus Sylvester Frederick Christie,19,1,1


In [228]:
df = df.drop([3062])
df.loc[1738, ['goals', 'assists']] = [0, 0]

In [229]:
df[df['player_id']==206083.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
3130,206083.0,Josh Murphy,19,0,1
3131,206083.0,Josh Murphy,19,3,2


In [230]:
df = df.drop(3130)

In [231]:
df[df['player_id']==206085.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
408,206085.0,Jacob Murphy,23,4,2
1078,206085.0,Jacob Murphy,22,1,2
1741,206085.0,Jacob Murphy,21,2,3
3067,206085.0,Jacob Murphy,19,0,1
3068,206085.0,Jacob Murphy,19,3,2
3796,206085.0,Jacob Murphy,18,1,1


In [232]:
df = df.drop(3068)

In [233]:
df[df['player_id']==232805.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
1672,232805.0,Bernardo Fernandes da Silva Junior,21,1,0
2316,232805.0,Bernardo Fernandes da Silva Junior,20,3,2
2978,232805.0,Bernardo Fernandes da Silva Junior,19,1,4
2979,232805.0,Bernardo Fernandes da Silva Junior,19,0,1


In [234]:
df = df.drop(2978)
df.loc[2316, ['goals', 'assists']] = [0, 0]
df.loc[1672, ['goals', 'assists']] = [0, 0]

In [235]:
df[df['player_id']==218667.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
10,218667.0,Bernardo Mota Veiga de Carvalho e Silva,23,4,6
672,218667.0,Bernardo Mota Veiga de Carvalho e Silva,22,8,4
1327,218667.0,Bernardo Mota Veiga de Carvalho e Silva,21,1,0
1328,218667.0,Bernardo Mota Veiga de Carvalho e Silva,21,2,6
1991,218667.0,Bernardo Mota Veiga de Carvalho e Silva,20,3,2
1992,218667.0,Bernardo Mota Veiga de Carvalho e Silva,20,6,7
2679,218667.0,Bernardo Mota Veiga de Carvalho e Silva,19,1,4
2680,218667.0,Bernardo Mota Veiga de Carvalho e Silva,19,0,1
2681,218667.0,Bernardo Mota Veiga de Carvalho e Silva,19,7,7
3340,218667.0,Bernardo Mota Veiga de Carvalho e Silva,18,6,4


In [236]:
df[(df['fifa_version'] == 19) & (df['goals'] == 7)][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
2661,204485.0,Riyad Mahrez,19,7,4
2681,218667.0,Bernardo Mota Veiga de Carvalho e Silva,19,7,7
2693,230666.0,Gabriel Fernando de Jesus,19,7,3
2869,205569.0,James Ward-Prowse,19,7,0
2905,195859.0,Daniel William John Ings,19,7,3
2955,194138.0,Andre Gray,19,7,2
2963,207807.0,Ryan Fraser,19,7,14
2971,220697.0,James Maddison,19,7,7
3135,220196.0,David Robert Brooks,19,7,5
6008,,Chicharito,19,7,1


Checking  to  see  if  Bernardo  Silva's  stats  were  attributed  to  someone  else.  This  shows  they  weren't  since  James  Maddison  actually achieved  7  goals  and  7  assists.

In [237]:
df = df.drop([2680, 2679, 1991, 1327])

In [238]:
df[df['player_id']==205525.0][['player_id', 'long_name',  'fifa_version', 'goals', 'assists']]

Unnamed: 0,player_id,long_name,fifa_version,goals,assists
1475,205525.0,Bernard Anício Caldeira Duarte,21,1,0
2084,205525.0,Bernard Anício Caldeira Duarte,20,3,2


These  stats  are  correct

In [239]:
conn.close()