# Repairing/Fixing cols and rows
* Data divided into multiple columns - Series.str.cat()
* Multiple data in single column - Series.str.split()
* Multiple data in single row label - .index.str.split()
* Data divided into multiple rows - .index.str.cat()
* Multiple observations in single cell - .assign().explode()
* Columns labels contains data - pd.wide_to_long()

In [603]:
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter(category=Warning, action="ignore")

In [605]:
fifa = pd.read_csv("../PROJECTS/FIFA/players_20.csv")

In [606]:
fifa.iloc[ : 3, 75:]

Unnamed: 0,goalkeeping_positioning,goalkeeping_reflexes,goalkeeping_speed,ls,st,rs,lw,lf,cf,rf,...,lcb,cb,rcb,rb,gk,player_face_url,club_logo_url,club_flag_url,nation_logo_url,nation_flag_url
0,14,8,,89+5,89+5,89+5,93+1,93+1,93+1,93+1,...,53+6,53+6,53+6,63+6,19+6,https://cdn.sofifa.net/players/158/023/20_120.png,https://cdn.sofifa.net/teams/241/60.png,https://cdn.sofifa.net/flags/es.png,,https://cdn.sofifa.net/flags/ar.png
1,14,11,,91+2,91+2,91+2,90,91,91,91,...,54+3,54+3,54+3,61+3,20+3,https://cdn.sofifa.net/players/020/801/20_120.png,https://cdn.sofifa.net/teams/45/60.png,https://cdn.sofifa.net/flags/it.png,https://cdn.sofifa.net/teams/1354/60.png,https://cdn.sofifa.net/flags/pt.png
2,15,11,,84+6,84+6,84+6,91+1,90+2,90+2,90+2,...,47+6,47+6,47+6,61+6,20+6,https://cdn.sofifa.net/players/190/871/20_120.png,https://cdn.sofifa.net/teams/73/60.png,https://cdn.sofifa.net/flags/fr.png,https://cdn.sofifa.net/teams/1370/60.png,https://cdn.sofifa.net/flags/br.png


# Multiple data in single column

In [607]:
pos = [col for col in list(fifa.columns) if "position" in col ]

In [608]:
fifa_pos = fifa[[pos[0]]]

In [609]:
fifa_pos.head(2)

Unnamed: 0,player_positions
0,"RW, CF, ST"
1,"ST, LW"


In [610]:
# option 1 - keep first occurence
fifa_pos["first"] = fifa_pos["player_positions"].str.split(pat = ", " ).str[0]

In [612]:
fifa_pos.head(3)

Unnamed: 0,player_positions,first
0,"RW, CF, ST",RW
1,"ST, LW",ST
2,"LW, CAM",LW


In [613]:
# option 2 - last occurence
fifa_pos["last"] = fifa_pos["player_positions"].str.split(pat = ", " ).str[-1]

In [614]:
fifa_pos.head(3)

Unnamed: 0,player_positions,first,last
0,"RW, CF, ST",RW,ST
1,"ST, LW",ST,LW
2,"LW, CAM",LW,CAM


# Data divided into multiple cols

In [623]:
goalkeeping = [ col for col in list(fifa.columns) if "goalkeeping" in col]

In [625]:
goalkeeping

['goalkeeping_diving',
 'goalkeeping_handling',
 'goalkeeping_kicking',
 'goalkeeping_positioning',
 'goalkeeping_reflexes',
 'goalkeeping_speed']

In [626]:
fifa.loc[:, goalkeeping[:-1]].isna().sum()

goalkeeping_diving         0
goalkeeping_handling       0
goalkeeping_kicking        0
goalkeeping_positioning    0
goalkeeping_reflexes       0
dtype: int64

In [629]:
fifa_goalkeeping = fifa.loc[:, goalkeeping[:-1]]

In [632]:
fifa_goalkeeping.head(2)

Unnamed: 0,goalkeeping_diving,goalkeeping_handling,goalkeeping_kicking,goalkeeping_positioning,goalkeeping_reflexes
0,6,11,15,14,8
1,7,11,15,14,11


In [633]:
fifa_goalkeeping["stat_mean"] = fifa_goalkeeping.sum(axis = 1) / fifa_goalkeeping.shape[1]

In [634]:
fifa_names = fifa[["short_name", "long_name"]]

In [638]:
fifa_names.head(2)

Unnamed: 0,short_name,long_name
0,L. Messi,Lionel Andrés Messi Cuccittini
1,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro


In [639]:
fifa_names["name"] = fifa["long_name"].str.split(pat = " ", n = 1).str[0]
fifa_names["surname"] = fifa["long_name"].str.split(pat = " ", n = 1).str[1]

In [642]:
fifa_names.head(2)

Unnamed: 0,short_name,long_name,name,surname
0,L. Messi,Lionel Andrés Messi Cuccittini,Lionel,Andrés Messi Cuccittini
1,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,Cristiano,Ronaldo dos Santos Aveiro


In [643]:
fifa_names["full"] = fifa_names["name"].str.cat(fifa_names["surname"], sep = "-")

In [646]:
fifa_names.head()

Unnamed: 0,short_name,long_name,name,surname,full
0,L. Messi,Lionel Andrés Messi Cuccittini,Lionel,Andrés Messi Cuccittini,Lionel-Andrés Messi Cuccittini
1,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,Cristiano,Ronaldo dos Santos Aveiro,Cristiano-Ronaldo dos Santos Aveiro
2,Neymar Jr,Neymar da Silva Santos Júnior,Neymar,da Silva Santos Júnior,Neymar-da Silva Santos Júnior
3,E. Hazard,Eden Hazard,Eden,Hazard,Eden-Hazard
4,K. De Bruyne,Kevin De Bruyne,Kevin,De Bruyne,Kevin-De Bruyne


# Multiple data in single row label 

In [727]:
fifa_rows = fifa_names.loc[: , : ]

In [729]:
fifa_rows.set_index("full", inplace  = True)

In [731]:
fifa_rows.head(2)

Unnamed: 0_level_0,short_name,long_name,name,surname
full,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Lionel-Andrés Messi Cuccittini,L. Messi,Lionel Andrés Messi Cuccittini,Lionel,Andrés Messi Cuccittini
Cristiano-Ronaldo dos Santos Aveiro,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,Cristiano,Ronaldo dos Santos Aveiro


In [733]:
fifa_rows.index = fifa_rows.index.str.split("-", n = 1,  expand=True)

In [735]:
fifa_rows.index = fifa_rows.index.rename(["name", "surname2"])
fifa_rows.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,short_name,long_name,name,surname
name,surname2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Lionel,Andrés Messi Cuccittini,L. Messi,Lionel Andrés Messi Cuccittini,Lionel,Andrés Messi Cuccittini
Cristiano,Ronaldo dos Santos Aveiro,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,Cristiano,Ronaldo dos Santos Aveiro


#  Data divided in multi index 

In [737]:
fifa_rows.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,short_name,long_name,name,surname
name,surname2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Lionel,Andrés Messi Cuccittini,L. Messi,Lionel Andrés Messi Cuccittini,Lionel,Andrés Messi Cuccittini
Cristiano,Ronaldo dos Santos Aveiro,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,Cristiano,Ronaldo dos Santos Aveiro


In [741]:
fifa_rows.reset_index(inplace=True, drop = False, level="surname2")

In [743]:
fifa_rows.head(2)

Unnamed: 0_level_0,surname2,short_name,long_name,name,surname
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Lionel,Andrés Messi Cuccittini,L. Messi,Lionel Andrés Messi Cuccittini,Lionel,Andrés Messi Cuccittini
Cristiano,Ronaldo dos Santos Aveiro,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,Cristiano,Ronaldo dos Santos Aveiro


In [753]:
fifa_rows.index = fifa_rows.index.str.cat(fifa_rows["surname2"], sep = "-")

In [892]:
fifa_rows.head(2)

Unnamed: 0_level_0,surname2,short_name,long_name,name,surname
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Lionel-Andrés Messi Cuccittini,Andrés Messi Cuccittini,L. Messi,Lionel Andrés Messi Cuccittini,Lionel,Andrés Messi Cuccittini
Cristiano-Ronaldo dos Santos Aveiro,Ronaldo dos Santos Aveiro,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,Cristiano,Ronaldo dos Santos Aveiro


# Multiple observation in single cell

In [895]:
fifa_obs = fifa.loc[:4 , ["short_name","st","rs", "lw",	"lf"]]

In [897]:
fifa_obs

Unnamed: 0,short_name,st,rs,lw,lf
0,L. Messi,89+5,89+5,93+1,93+1
1,Cristiano Ronaldo,91+2,91+2,90,91
2,Neymar Jr,84+6,84+6,91+1,90+2
3,E. Hazard,84+6,84+6,89+2,89+2
4,K. De Bruyne,83+3,83+3,87,87


In [899]:
fifa_obs.assign(st_points = fifa_obs["st"].str.split(pat = "+")).explode("st_points")

Unnamed: 0,short_name,st,rs,lw,lf,st_points
0,L. Messi,89+5,89+5,93+1,93+1,89
0,L. Messi,89+5,89+5,93+1,93+1,5
1,Cristiano Ronaldo,91+2,91+2,90,91,91
1,Cristiano Ronaldo,91+2,91+2,90,91,2
2,Neymar Jr,84+6,84+6,91+1,90+2,84
2,Neymar Jr,84+6,84+6,91+1,90+2,6
3,E. Hazard,84+6,84+6,89+2,89+2,84
3,E. Hazard,84+6,84+6,89+2,89+2,6
4,K. De Bruyne,83+3,83+3,87,87,83
4,K. De Bruyne,83+3,83+3,87,87,3


# Columns labels contains data

In [932]:
attacking = [col for col in fifa.columns if "attacking" in col]
attacking.append("short_name")

In [934]:
attacking

['attacking_crossing',
 'attacking_finishing',
 'attacking_heading_accuracy',
 'attacking_short_passing',
 'attacking_volleys']

In [952]:
fifa_attacking = fifa.loc[:4 ,attacking[2:]]

In [954]:
fifa_attacking

Unnamed: 0,attacking_heading_accuracy,attacking_short_passing,attacking_volleys,short_name
0,70,92,88,L. Messi
1,89,83,87,Cristiano Ronaldo
2,62,87,87,Neymar Jr
3,61,89,83,E. Hazard
4,55,92,82,K. De Bruyne


In [960]:
pd.wide_to_long(
    df=fifa_attacking,
    i="short_name",
    stubnames=["attacking"],
    sep="_",
    j="attr",
    suffix="\w+"
).sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,attacking
short_name,attr,Unnamed: 2_level_1
Cristiano Ronaldo,heading_accuracy,89
Cristiano Ronaldo,short_passing,83
Cristiano Ronaldo,volleys,87
E. Hazard,heading_accuracy,61
E. Hazard,short_passing,89
E. Hazard,volleys,83
K. De Bruyne,heading_accuracy,55
K. De Bruyne,short_passing,92
K. De Bruyne,volleys,82
L. Messi,heading_accuracy,70
