# Data Cleaning

In [135]:
import pandas as pd
import numpy as np
import seaborn as sns


## Accessment of Data

In [136]:
# Making the copy of the dataset 

df_fifa = pd.read_csv('Fifa_world_cup3.csv')
fifa = df_fifa.copy()

### Data Summery 

- The dataset represents historical information on FIFA World Cup matches. It includes details about home and away teams, match scores, penalties, goals, and other match-specific data like the stadium, city, attendance, and referee information. There are 865 matches in the dataset, and certain columns contain incomplete data, especially for goals scored by the first and second teams, and referee cities. Some columns, such as attendance, contain non-numeric values, and date formatting is inconsistent.

### Info About Each Column

- __Home_Team:__ The name of the home team (object type).
- __Score:__ The match score (object type).
- __Away_Team:__ The name of the away team (object type).
- __Penalties:__ Indicates whether penalties were involved (object type).
- __First_Team_Goals:__ Descriptions of goals scored by the first team (object type).
- __Second_Team_Goals:__ Descriptions of goals scored by the second team (object type).
- __Date:__ Date of the match (object type).
- __Time:__ Time of the match (object type).
- __Stadium:__ Stadium where the match was played (object type).
- __City:__ City where the match was played (object type).
- __Attendance:__ The number of spectators (object type).
- __Referee:__ The referee of the match (object type).
- __Referee_City:__ The city where the referee is from (object type).

In [137]:
fifa.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 929 entries, 0 to 928
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Home_Team          929 non-null    object
 1   Score              929 non-null    object
 2   Away_Team          929 non-null    object
 3   Penalties          929 non-null    object
 4   First_Team_Goals   722 non-null    object
 5   Second_Team_Goals  602 non-null    object
 6   Date               929 non-null    object
 7   Time               928 non-null    object
 8   Stadium            929 non-null    object
 9   City               907 non-null    object
 10  Attendance         928 non-null    object
 11  Referee            928 non-null    object
 12  Referee_City       887 non-null    object
dtypes: object(13)
memory usage: 94.5+ KB


In [138]:
fifa.describe()

Unnamed: 0,Home_Team,Score,Away_Team,Penalties,First_Team_Goals,Second_Team_Goals,Date,Time,Stadium,City,Attendance,Referee,Referee_City
count,929,929,929,929,722,602,929,928,929,907,928,928,887
unique,81,69,87,14,721,602,365,125,209,169,720,410,88
top,Brazil,1–0,Mexico,NO,Houseman 20',Mbappé 80' (pen.) 81' 118' (pen.),27 May 1934,16:00,Estadio Azteca,Mexico City,"Attendance: 45,000",Ravshan Irmatov,Italy
freq,82,100,41,894,2,1,8,80,19,23,13,11,51


In [139]:
fifa[fifa['Time'].isnull()]

Unnamed: 0,Home_Team,Score,Away_Team,Penalties,First_Team_Goals,Second_Team_Goals,Date,Time,Stadium,City,Attendance,Referee,Referee_City
37,Sweden,w/o,Austria,NO,,,5 June 1938,,Stade Gerland,Lyon,,,


In [140]:
fifa['First_Team_Goals'].isnull().sum()

np.int64(207)

In [141]:
fifa['Second_Team_Goals'].isnull().sum()

np.int64(327)

In [142]:
fifa.head()

Unnamed: 0,Home_Team,Score,Away_Team,Penalties,First_Team_Goals,Second_Team_Goals,Date,Time,Stadium,City,Attendance,Referee,Referee_City
0,France,4–1,Mexico,NO,L. Laurent 19'; Langiller 40'; Maschinot 43...,Carreño 70',13 July 1930,15:00 UYT (UTC−03:30),Estadio Pocitos,Montevideo,"Attendance: 4,444",Uruguay,
1,Argentina,1–0,France,NO,Monti 81',,15 July 1930,16:00 UYT (UTC−03:30),Estadio Parque Central,Montevideo,"Attendance: 23,409",Almeida Rêgo,Brazil
2,Chile,3–0,Mexico,NO,"Vidal 3', 65'; M. Rosas 52' (o.g.); o.g.",,16 July 1930,14:45 UYT (UTC−03:30),Estadio Parque Central,Montevideo,"Attendance: 9,249",Henri Christophe,Belgium
3,Chile,1–0,France,NO,Subiabre 67',,19 July 1930,12:50 UYT (UTC−03:30),Estadio Centenario,Montevideo,"Attendance: 2,000",Uruguay,
4,Argentina,6–3,Mexico,NO,"Stábile 8', 17', 80'; Zumelzú 12', 55'; Vara...","M. Rosas 42' (pen.), 65'; pen. 75'; Gayón",19 July 1930,15:00 UYT (UTC−03:30),Estadio Centenario,Montevideo,"Attendance: 42,100",Ulises Saucedo,Bolivia


In [143]:
fifa.tail()

Unnamed: 0,Home_Team,Score,Away_Team,Penalties,First_Team_Goals,Second_Team_Goals,Date,Time,Stadium,City,Attendance,Referee,Referee_City
924,England,1–2,France,NO,Kane 54' (pen.),Tchouaméni 17'; Giroud 78',10 December 2022 (2022-12-10),22:00,Al Bayt Stadium,Al Khor,"Attendance: 68,895",Wilton Sampaio,Brazil
925,Argentina,3–0,Croatia,NO,Messi 34' (pen.); Álvarez 39' 69',,13 December 2022 (2022-12-13),22:00,Lusail Stadium,Lusail,"Attendance: 88,966",Daniele Orsato,Italy
926,France,2–0,Morocco,NO,T. Hernandez 5'; Kolo Muani 79',,14 December 2022 (2022-12-14),22:00,Al Bayt Stadium,Al Khor,"Attendance: 68,294",César Arturo Ramos,Mexico
927,Croatia,2–1,Morocco,NO,Gvardiol 7'; Oršić 42',Dari 9',17 December 2022 (2022-12-17),18:00,Khalifa International Stadium,Al Rayyan,"Attendance: 44,137",Abdulrahman Al-Jassim,Qatar
928,Argentina,3–3 (a.e.t.),France,4–2,Messi 23' (pen.) 108'; Di María 36',Mbappé 80' (pen.) 81' 118' (pen.),18 December 2022 (2022-12-18),18:00,Lusail Stadium,Lusail,"Attendance: 88,966",Szymon Marciniak,Poland


In [144]:
fifa.sample(5)

Unnamed: 0,Home_Team,Score,Away_Team,Penalties,First_Team_Goals,Second_Team_Goals,Date,Time,Stadium,City,Attendance,Referee,Referee_City
310,Peru,0–0,Cameroon,NO,,,15 June 1982,17:15 CEST,Estadio de Riazor,A Coruña,"Attendance: 11,000",Franz Wöhrer,Austria
804,Uruguay,1–0,Saudi Arabia,NO,Suárez 23',,20 June 2018 (2018-06-20),18:00 MSK (UTC+3),Rostov Arena,Rostov-on-Don,"Attendance: 42,678[146]",Clément Turpin,France
570,Germany,8–0,Saudi Arabia,NO,"Klose 20', 25', 70'; Ballack 40'; Jancker 4...",,1 June 2002,20:30 JST (UTC+9),Sapporo Dome,Sapporo,"Attendance: 32,218",Ubaldo Aquino,Paraguay
243,Scotland,1–1,Yugoslavia,NO,Jordan 88',Karasi 81',22 June 1974,16:00,Waldstadion,Frankfurt,"Attendance: 56,000",Alfonso González Archundía,Mexico
366,Argentina,2–0,Bulgaria,NO,Valdano 4'; Burruchaga 77',,10 June 1986,12:00 CST,Estadio Olímpico Universitario,Mexico City,"Attendance: 65,000",Berny Ulloa Morera,Costa Rica


### Dirty Data vs Messy Data

#### Dirty Data

- Home_Team
    1. Need to strip the spaces in the name __Consistency__
    2. Need to change all the letter into smaller cases __Consistency__

- Attendance
    1. The values contain non-numeric characters (e.g., "Attendance: 4,444").
        __Validity__

- First_Team_Goals & Second_Team_Goals
    1. The format includes textual representations of goals, such as "L. Laurent 19'" and mixed data types. __Accuracy__

- Date
    1. Inconsistent formats like "8 June 1958" and "15 July 1930". Need to remove one of the date.__Consistency__
    2. Also need to change the type to datetime. __Accuracy__

- Referee_City
    1. Missing data in several rows.
       __Completeness__

- Penalties
    1. There are only two values: "YES" and "NO". It can be simplified to binary (1 or 0). __Consistency__

- Score
    1. The scores are represented in a non-standardized format, e.g., "4–1". __Consistency__
    2. Need to change the type to int __Validity__

- Time
    1. Need to remove extra written text. __Validity__
    2. Need to change the column type datetime __Consistency__

#### Messy Data

- First_Team_Goal
    1. Need to make one more column in which it will show the player name who goal and at what time for Home team

- Second_Team_Goal 
    1. Need to make one more column in which it will show the player name who goal and at what time for away team

- Score 
    1. Need to make the two different row. One for Home teams score and away teams score team. Then one more for the winner team.


### Define, code, test

In [145]:
# Home_Team: Strip spaces and convert to lowercase
fifa['Home_Team'] = fifa['Home_Team'].str.strip().str.lower()

In [146]:
fifa.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 929 entries, 0 to 928
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Home_Team          929 non-null    object
 1   Score              929 non-null    object
 2   Away_Team          929 non-null    object
 3   Penalties          929 non-null    object
 4   First_Team_Goals   722 non-null    object
 5   Second_Team_Goals  602 non-null    object
 6   Date               929 non-null    object
 7   Time               928 non-null    object
 8   Stadium            929 non-null    object
 9   City               907 non-null    object
 10  Attendance         928 non-null    object
 11  Referee            928 non-null    object
 12  Referee_City       887 non-null    object
dtypes: object(13)
memory usage: 94.5+ KB


In [147]:
fifa['First_Team_Goals'].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  fifa['First_Team_Goals'].fillna(0, inplace=True)


In [148]:
fifa['Second_Team_Goals'].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  fifa['Second_Team_Goals'].fillna(0, inplace=True)


In [149]:
fifa['City'].fillna('Unknown', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  fifa['City'].fillna('Unknown', inplace=True)


In [150]:
fifa['Referee_City'].fillna('Unknown', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  fifa['Referee_City'].fillna('Unknown', inplace=True)


In [151]:
fifa.head()

Unnamed: 0,Home_Team,Score,Away_Team,Penalties,First_Team_Goals,Second_Team_Goals,Date,Time,Stadium,City,Attendance,Referee,Referee_City
0,france,4–1,Mexico,NO,L. Laurent 19'; Langiller 40'; Maschinot 43...,Carreño 70',13 July 1930,15:00 UYT (UTC−03:30),Estadio Pocitos,Montevideo,"Attendance: 4,444",Uruguay,Unknown
1,argentina,1–0,France,NO,Monti 81',0,15 July 1930,16:00 UYT (UTC−03:30),Estadio Parque Central,Montevideo,"Attendance: 23,409",Almeida Rêgo,Brazil
2,chile,3–0,Mexico,NO,"Vidal 3', 65'; M. Rosas 52' (o.g.); o.g.",0,16 July 1930,14:45 UYT (UTC−03:30),Estadio Parque Central,Montevideo,"Attendance: 9,249",Henri Christophe,Belgium
3,chile,1–0,France,NO,Subiabre 67',0,19 July 1930,12:50 UYT (UTC−03:30),Estadio Centenario,Montevideo,"Attendance: 2,000",Uruguay,Unknown
4,argentina,6–3,Mexico,NO,"Stábile 8', 17', 80'; Zumelzú 12', 55'; Vara...","M. Rosas 42' (pen.), 65'; pen. 75'; Gayón",19 July 1930,15:00 UYT (UTC−03:30),Estadio Centenario,Montevideo,"Attendance: 42,100",Ulises Saucedo,Bolivia


In [152]:
import re
def clean_attendance(value):
    if pd.isnull(value):  # Check for null values to avoid processing them
        return None
    numeric_value = re.sub(r'[^0-9]', '', str(value))
    return int(numeric_value) if numeric_value else None

fifa['Attendance'] = fifa['Attendance'].apply(clean_attendance)


In [153]:
fifa.head()

Unnamed: 0,Home_Team,Score,Away_Team,Penalties,First_Team_Goals,Second_Team_Goals,Date,Time,Stadium,City,Attendance,Referee,Referee_City
0,france,4–1,Mexico,NO,L. Laurent 19'; Langiller 40'; Maschinot 43...,Carreño 70',13 July 1930,15:00 UYT (UTC−03:30),Estadio Pocitos,Montevideo,4444.0,Uruguay,Unknown
1,argentina,1–0,France,NO,Monti 81',0,15 July 1930,16:00 UYT (UTC−03:30),Estadio Parque Central,Montevideo,23409.0,Almeida Rêgo,Brazil
2,chile,3–0,Mexico,NO,"Vidal 3', 65'; M. Rosas 52' (o.g.); o.g.",0,16 July 1930,14:45 UYT (UTC−03:30),Estadio Parque Central,Montevideo,9249.0,Henri Christophe,Belgium
3,chile,1–0,France,NO,Subiabre 67',0,19 July 1930,12:50 UYT (UTC−03:30),Estadio Centenario,Montevideo,2000.0,Uruguay,Unknown
4,argentina,6–3,Mexico,NO,"Stábile 8', 17', 80'; Zumelzú 12', 55'; Vara...","M. Rosas 42' (pen.), 65'; pen. 75'; Gayón",19 July 1930,15:00 UYT (UTC−03:30),Estadio Centenario,Montevideo,42100.0,Ulises Saucedo,Bolivia


In [154]:
import re

# Function to clean attendance values by removing non-numeric characters
def clean_attendance(value):
    numeric_value = re.sub(r'[^0-9]', '', str(value))
    return int(numeric_value) if numeric_value else None

fifa['Attendance'] = fifa['Attendance'].apply(clean_attendance)

In [155]:
fifa.head()

Unnamed: 0,Home_Team,Score,Away_Team,Penalties,First_Team_Goals,Second_Team_Goals,Date,Time,Stadium,City,Attendance,Referee,Referee_City
0,france,4–1,Mexico,NO,L. Laurent 19'; Langiller 40'; Maschinot 43...,Carreño 70',13 July 1930,15:00 UYT (UTC−03:30),Estadio Pocitos,Montevideo,44440.0,Uruguay,Unknown
1,argentina,1–0,France,NO,Monti 81',0,15 July 1930,16:00 UYT (UTC−03:30),Estadio Parque Central,Montevideo,234090.0,Almeida Rêgo,Brazil
2,chile,3–0,Mexico,NO,"Vidal 3', 65'; M. Rosas 52' (o.g.); o.g.",0,16 July 1930,14:45 UYT (UTC−03:30),Estadio Parque Central,Montevideo,92490.0,Henri Christophe,Belgium
3,chile,1–0,France,NO,Subiabre 67',0,19 July 1930,12:50 UYT (UTC−03:30),Estadio Centenario,Montevideo,20000.0,Uruguay,Unknown
4,argentina,6–3,Mexico,NO,"Stábile 8', 17', 80'; Zumelzú 12', 55'; Vara...","M. Rosas 42' (pen.), 65'; pen. 75'; Gayón",19 July 1930,15:00 UYT (UTC−03:30),Estadio Centenario,Montevideo,421000.0,Ulises Saucedo,Bolivia


In [156]:
fifa['Date'] = fifa['Date'].str.strip().str.split("(" ).str.get(0)

In [157]:
fifa['Date'] = pd.DatetimeIndex(fifa['Date'])

In [158]:
fifa.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 929 entries, 0 to 928
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Home_Team          929 non-null    object        
 1   Score              929 non-null    object        
 2   Away_Team          929 non-null    object        
 3   Penalties          929 non-null    object        
 4   First_Team_Goals   929 non-null    object        
 5   Second_Team_Goals  929 non-null    object        
 6   Date               929 non-null    datetime64[ns]
 7   Time               928 non-null    object        
 8   Stadium            929 non-null    object        
 9   City               929 non-null    object        
 10  Attendance         928 non-null    float64       
 11  Referee            928 non-null    object        
 12  Referee_City       929 non-null    object        
dtypes: datetime64[ns](1), float64(1), object(11)
memory usage: 94.5+ 

In [159]:
# Penalties: Simplify "YES" and "NO" to binary values (1 for YES, 0 for NO)
fifa['Penalties'] = fifa['Penalties'].apply(lambda x: 1 if x == 'YES' else 0)


In [160]:
fifa.head()

Unnamed: 0,Home_Team,Score,Away_Team,Penalties,First_Team_Goals,Second_Team_Goals,Date,Time,Stadium,City,Attendance,Referee,Referee_City
0,france,4–1,Mexico,0,L. Laurent 19'; Langiller 40'; Maschinot 43...,Carreño 70',1930-07-13,15:00 UYT (UTC−03:30),Estadio Pocitos,Montevideo,44440.0,Uruguay,Unknown
1,argentina,1–0,France,0,Monti 81',0,1930-07-15,16:00 UYT (UTC−03:30),Estadio Parque Central,Montevideo,234090.0,Almeida Rêgo,Brazil
2,chile,3–0,Mexico,0,"Vidal 3', 65'; M. Rosas 52' (o.g.); o.g.",0,1930-07-16,14:45 UYT (UTC−03:30),Estadio Parque Central,Montevideo,92490.0,Henri Christophe,Belgium
3,chile,1–0,France,0,Subiabre 67',0,1930-07-19,12:50 UYT (UTC−03:30),Estadio Centenario,Montevideo,20000.0,Uruguay,Unknown
4,argentina,6–3,Mexico,0,"Stábile 8', 17', 80'; Zumelzú 12', 55'; Vara...","M. Rosas 42' (pen.), 65'; pen. 75'; Gayón",1930-07-19,15:00 UYT (UTC−03:30),Estadio Centenario,Montevideo,421000.0,Ulises Saucedo,Bolivia


In [161]:
fifa['Time'] = fifa['Time'].astype('string')

In [162]:
fifa.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 929 entries, 0 to 928
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Home_Team          929 non-null    object        
 1   Score              929 non-null    object        
 2   Away_Team          929 non-null    object        
 3   Penalties          929 non-null    int64         
 4   First_Team_Goals   929 non-null    object        
 5   Second_Team_Goals  929 non-null    object        
 6   Date               929 non-null    datetime64[ns]
 7   Time               928 non-null    string        
 8   Stadium            929 non-null    object        
 9   City               929 non-null    object        
 10  Attendance         928 non-null    float64       
 11  Referee            928 non-null    object        
 12  Referee_City       929 non-null    object        
dtypes: datetime64[ns](1), float64(1), int64(1), object(9), string(1)


In [163]:
fifa['Time'].astype('string')

0      15:00 UYT (UTC−03:30)
1      16:00 UYT (UTC−03:30)
2      14:45 UYT (UTC−03:30)
3      12:50 UYT (UTC−03:30)
4      15:00 UYT (UTC−03:30)
               ...          
924                    22:00
925                    22:00
926                    22:00
927                    18:00
928                    18:00
Name: Time, Length: 929, dtype: string

In [164]:
fifa.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 929 entries, 0 to 928
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Home_Team          929 non-null    object        
 1   Score              929 non-null    object        
 2   Away_Team          929 non-null    object        
 3   Penalties          929 non-null    int64         
 4   First_Team_Goals   929 non-null    object        
 5   Second_Team_Goals  929 non-null    object        
 6   Date               929 non-null    datetime64[ns]
 7   Time               928 non-null    string        
 8   Stadium            929 non-null    object        
 9   City               929 non-null    object        
 10  Attendance         928 non-null    float64       
 11  Referee            928 non-null    object        
 12  Referee_City       929 non-null    object        
dtypes: datetime64[ns](1), float64(1), int64(1), object(9), string(1)


In [168]:
fifa['Time'] = fifa['Time'].str.split(" ").str.get(0)

In [173]:
def clean_time(value):
    if pd.isnull(value):  
        return np.nan
   
    time_part = value.split(' ')[0]
    return time_part 

fifa['Start_Time'] = fifa['Time'].apply(clean_time)

fifa['Start_Time'] = pd.to_datetime(fifa['Start_Time'], format='%H:%M', errors='coerce').dt.time


In [175]:
fifa['Time'] = fifa['Start_Time']

In [176]:
fifa.drop(columns=['Start_Time','Cleaned_Time' ], inplace=True)

In [177]:
fifa.head()

Unnamed: 0,Home_Team,Score,Away_Team,Penalties,First_Team_Goals,Second_Team_Goals,Date,Time,Stadium,City,Attendance,Referee,Referee_City
0,france,4–1,Mexico,0,L. Laurent 19'; Langiller 40'; Maschinot 43...,Carreño 70',1930-07-13,15:00:00,Estadio Pocitos,Montevideo,44440.0,Uruguay,Unknown
1,argentina,1–0,France,0,Monti 81',0,1930-07-15,16:00:00,Estadio Parque Central,Montevideo,234090.0,Almeida Rêgo,Brazil
2,chile,3–0,Mexico,0,"Vidal 3', 65'; M. Rosas 52' (o.g.); o.g.",0,1930-07-16,14:45:00,Estadio Parque Central,Montevideo,92490.0,Henri Christophe,Belgium
3,chile,1–0,France,0,Subiabre 67',0,1930-07-19,12:50:00,Estadio Centenario,Montevideo,20000.0,Uruguay,Unknown
4,argentina,6–3,Mexico,0,"Stábile 8', 17', 80'; Zumelzú 12', 55'; Vara...","M. Rosas 42' (pen.), 65'; pen. 75'; Gayón",1930-07-19,15:00:00,Estadio Centenario,Montevideo,421000.0,Ulises Saucedo,Bolivia


In [178]:
def extract_goals(goal_column):
    player_names = []
    goal_times = []
    for goals in goal_column:
        if pd.isna(goals):
            continue
        goal_entries = re.split(r'; |, ', goals)
        for entry in goal_entries:
            match = re.match(r'(.+?)\s(\d+)\'', entry)
            if match:
                player_names.append(match.group(1).strip())
                goal_times.append(match.group(2).strip())
    return player_names, goal_times

expanded_rows = []

for index, row in fifa.iterrows():
    first_team_players, first_team_times = extract_goals([row['First_Team_Goals']])
    second_team_players, second_team_times = extract_goals([row['Second_Team_Goals']])
    
    all_players = first_team_players + second_team_players
    all_goal_times = first_team_times + second_team_times
    
    for player, goal_time in zip(all_players, all_goal_times):
        new_row = row.copy()
        new_row['Player_Name'] = player
        new_row['Goal_Time'] = goal_time
        expanded_rows.append(new_row)

expanded_fifa = pd.DataFrame(expanded_rows)

TypeError: expected string or bytes-like object, got 'int'

In [179]:
expanded_fifa

Unnamed: 0,Home_Team,Score,Away_Team,Penalties,First_Team_Goals,Second_Team_Goals,Date,Time,Stadium,City,Attendance,Referee,Referee_City,Players_Goal,Player_Name,Goal_Time
0,france,4–1,Mexico,0,L. Laurent 19'; Langiller 40'; Maschinot 43...,Carreño 70',1930-07-13,0 days 15:00:00,Estadio Pocitos,Montevideo,44440.0,Uruguay,,L. Laurent 19'; Langiller 40'; Maschinot 43...,L. Laurent,19
0,france,4–1,Mexico,0,L. Laurent 19'; Langiller 40'; Maschinot 43...,Carreño 70',1930-07-13,0 days 15:00:00,Estadio Pocitos,Montevideo,44440.0,Uruguay,,L. Laurent 19'; Langiller 40'; Maschinot 43...,Langiller,40
0,france,4–1,Mexico,0,L. Laurent 19'; Langiller 40'; Maschinot 43...,Carreño 70',1930-07-13,0 days 15:00:00,Estadio Pocitos,Montevideo,44440.0,Uruguay,,L. Laurent 19'; Langiller 40'; Maschinot 43...,Maschinot,43
0,france,4–1,Mexico,0,L. Laurent 19'; Langiller 40'; Maschinot 43...,Carreño 70',1930-07-13,0 days 15:00:00,Estadio Pocitos,Montevideo,44440.0,Uruguay,,L. Laurent 19'; Langiller 40'; Maschinot 43...,Carreño,70
1,argentina,1–0,France,0,Monti 81',,1930-07-15,0 days 16:00:00,Estadio Parque Central,Montevideo,234090.0,Almeida Rêgo,Brazil,,Monti,81
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
864,france,4–2,Croatia,0,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Perišić 28'; Mandžukić 69',2018-07-15,0 days 18:00:00,Luzhniki Stadium,Moscow,780110.0,Néstor Pitana,Argentina,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Griezmann,38
864,france,4–2,Croatia,0,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Perišić 28'; Mandžukić 69',2018-07-15,0 days 18:00:00,Luzhniki Stadium,Moscow,780110.0,Néstor Pitana,Argentina,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Pogba,59
864,france,4–2,Croatia,0,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Perišić 28'; Mandžukić 69',2018-07-15,0 days 18:00:00,Luzhniki Stadium,Moscow,780110.0,Néstor Pitana,Argentina,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Mbappé,65
864,france,4–2,Croatia,0,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Perišić 28'; Mandžukić 69',2018-07-15,0 days 18:00:00,Luzhniki Stadium,Moscow,780110.0,Néstor Pitana,Argentina,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Perišić,28


In [42]:
expanded_fifa.sort_values([])

Unnamed: 0,Home_Team,Score,Away_Team,Penalties,First_Team_Goals,Second_Team_Goals,Date,Time,Stadium,City,Attendance,Referee,Referee_City,Players_Goal,Player_Name,Goal_Time
158,hungary,6–1,Bulgaria,0,"Albert 1', 6', 53'; Tichy 8', 70'; Solymosi ...",Sokolov 64'; [14],1962-06-03,0 days 15:00:00,Estadio El Teniente,Rancagua,74420.0,Juan Gardeazábal Garay,Spain,"Albert 1', 6', 53'; Tichy 8', 70'; Solymosi ...",Albert,1
327,england,3–1,France,0,"Robson 1', 67'; Mariner 83'",Soler 24',1982-06-16,0 days 17:15:00,San Mamés,Bilbao,441720.0,António Garrido,Portugal,"Robson 1', 67'; Mariner 83'Soler 24'",Robson,1
607,south korea,2–3,Turkey,0,Lee Eul-yong 9'; Song Chong-gug 90+3',"Şükür 1'; İlhan 13', 32'",2002-06-29,0 days 20:00:00,Daegu World Cup Stadium,Daegu,634830.0,Saad Mane,Kuwait,Lee Eul-yong 9'; Song Chong-gug 90+3'Şükür ...,Şükür,1
33,germany,3–2,Austria,0,"Lehner 1', 42'; Conen 27'",Horvath 28'; Sesta 54',1934-06-07,0 days 18:00:00,Stadio Giorgio Ascarelli,Naples,70000.0,Albino Carraro,Italy,"Lehner 1', 42'; Conen 27'Horvath 28'; Sesta...",Lehner,1
774,ghana,1–2,United States,0,A. Ayew 82',Dempsey 1'; Brooks 86',2014-06-16,0 days 19:00:00,Arena das Dunas,Natal,397600.0,Jonas Eriksson,Sweden,A. Ayew 82'Dempsey 1'; Brooks 86',Dempsey,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34,italy,2–1 (a.e.t.),Czechoslovakia,0,Orsi 81'; Schiavio 95',Puč 71',1934-06-10,0 days 15:30:00,Stadio Nazionale PNF,Rome,550000.0,Ivan Eklind,Sweden,Orsi 81'; Schiavio 95'Puč 71',Schiavio,95
230,italy,4–3 (a.e.t.),West Germany,0,Boninsegna 8'; Burgnich 98'; Riva 104'; Riv...,"Schnellinger 90'; Müller 95', 110'",1970-06-17,0 days 16:00:00,Estadio Azteca,Mexico City,1024440.0,Arturo Yamasaki,Peru,Boninsegna 8'; Burgnich 98'; Riva 104'; Riv...,Müller,95
358,west germany,3–3 (a.e.t.),France,0,Littbarski 17'; Rummenigge 102'; Fischer 108',Platini 26' (pen.); pen. 92'; Trésor 98'; G...,1982-07-08,0 days 21:00:00,Ramón Sánchez Pizjuán Stadium,Seville,700000.0,Charles Corver,Netherlands,Littbarski 17'; Rummenigge 102'; Fischer 10...,Trésor,98
658,argentina,2–1 (a.e.t.),Mexico,0,Crespo 10'; Rodríguez 98',Márquez 6',2006-06-24,0 days 21:00:00,Zentralstadion,Leipzig,430000.0,Massimo Busacca,Switzerland,Crespo 10'; Rodríguez 98'Márquez 6',Rodríguez,98


In [43]:
fifa2 = expanded_fifa.copy()

In [44]:
fifa2.reset_index()

Unnamed: 0,index,Home_Team,Score,Away_Team,Penalties,First_Team_Goals,Second_Team_Goals,Date,Time,Stadium,City,Attendance,Referee,Referee_City,Players_Goal,Player_Name,Goal_Time
0,0,france,4–1,Mexico,0,L. Laurent 19'; Langiller 40'; Maschinot 43...,Carreño 70',1930-07-13,0 days 15:00:00,Estadio Pocitos,Montevideo,44440.0,Uruguay,,L. Laurent 19'; Langiller 40'; Maschinot 43...,L. Laurent,19
1,0,france,4–1,Mexico,0,L. Laurent 19'; Langiller 40'; Maschinot 43...,Carreño 70',1930-07-13,0 days 15:00:00,Estadio Pocitos,Montevideo,44440.0,Uruguay,,L. Laurent 19'; Langiller 40'; Maschinot 43...,Langiller,40
2,0,france,4–1,Mexico,0,L. Laurent 19'; Langiller 40'; Maschinot 43...,Carreño 70',1930-07-13,0 days 15:00:00,Estadio Pocitos,Montevideo,44440.0,Uruguay,,L. Laurent 19'; Langiller 40'; Maschinot 43...,Maschinot,43
3,0,france,4–1,Mexico,0,L. Laurent 19'; Langiller 40'; Maschinot 43...,Carreño 70',1930-07-13,0 days 15:00:00,Estadio Pocitos,Montevideo,44440.0,Uruguay,,L. Laurent 19'; Langiller 40'; Maschinot 43...,Carreño,70
4,1,argentina,1–0,France,0,Monti 81',,1930-07-15,0 days 16:00:00,Estadio Parque Central,Montevideo,234090.0,Almeida Rêgo,Brazil,,Monti,81
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2027,864,france,4–2,Croatia,0,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Perišić 28'; Mandžukić 69',2018-07-15,0 days 18:00:00,Luzhniki Stadium,Moscow,780110.0,Néstor Pitana,Argentina,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Griezmann,38
2028,864,france,4–2,Croatia,0,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Perišić 28'; Mandžukić 69',2018-07-15,0 days 18:00:00,Luzhniki Stadium,Moscow,780110.0,Néstor Pitana,Argentina,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Pogba,59
2029,864,france,4–2,Croatia,0,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Perišić 28'; Mandžukić 69',2018-07-15,0 days 18:00:00,Luzhniki Stadium,Moscow,780110.0,Néstor Pitana,Argentina,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Mbappé,65
2030,864,france,4–2,Croatia,0,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Perišić 28'; Mandžukić 69',2018-07-15,0 days 18:00:00,Luzhniki Stadium,Moscow,780110.0,Néstor Pitana,Argentina,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Perišić,28


In [45]:
fifa.tail()

Unnamed: 0,Home_Team,Score,Away_Team,Penalties,First_Team_Goals,Second_Team_Goals,Date,Time,Stadium,City,Attendance,Referee,Referee_City,Players_Goal
860,russia,2–2 (a.e.t.),Croatia,0,Cheryshev 31'; Fernandes 115',Kramarić 39'; Vida 101',2018-07-07,0 days 21:00:00,Fisht Olympic Stadium,Sochi,442872020.0,Sandro Ricci,Brazil,Cheryshev 31'; Fernandes 115'Kramarić 39'; Vid...
861,france,1–0,Belgium,0,Umtiti 51',,2018-07-10,0 days 21:00:00,Krestovsky Stadium,Saint Petersburg,642862030.0,Andrés Cunha,Uruguay,
862,croatia,2–1 (a.e.t.),England,0,Perišić 68'; Mandžukić 109',Trippier 5',2018-07-11,0 days 21:00:00,Luzhniki Stadium,Moscow,780112040.0,Cüneyt Çakır,Turkey,Perišić 68'; Mandžukić 109'Trippier 5'
863,belgium,2–0,England,0,Meunier 4'; E. Hazard 82',,2018-07-14,0 days 17:00:00,Krestovsky Stadium,Saint Petersburg,644062050.0,Alireza Faghani,Iran,
864,france,4–2,Croatia,0,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...,Perišić 28'; Mandžukić 69',2018-07-15,0 days 18:00:00,Luzhniki Stadium,Moscow,780110.0,Néstor Pitana,Argentina,Mandžukić 18' (o.g.); Griezmann 38' (pen.); Po...
