# Scrapping FIFA country codes from Wikipedia

One of the goals of the project is to create dashboard to summarize the information retrieved and curated from DFB archives. Then, we will download the FIFA country codes from Wikipedia.

In [1]:
# imports
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [2]:
# wikipedia webpage
url_FIFAcodes = 'https://de.wikipedia.org/wiki/Liste_der_FIFA-Mitglieder'

# define dataframe
df = pd.DataFrame(columns=['Beitritt', 'Staat', 'FIFA code', 'Verband', 'Gründung', 'Konföderation'])

In [4]:
# Create a handle, page, to handle the contents of the website
data = requests.get(url_FIFAcodes).text

# Creating BeautifulSoup object
soup = BeautifulSoup(data, 'html.parser')

# Creating list with the tables
table = soup.find('table')

In [5]:
# Collecting Data
for row in table.tbody.find_all('tr'):
    # Find all data for each column
    columns = row.find_all('td')
        
    if(columns != []):
        beitritt = columns[0].text.strip()
        temp_staat = columns[1].find_all('a')
        if len(temp_staat[-1].text)>2:
            staat = temp_staat[-1].text
        else:
            staat = temp_staat[-2].text
        code = columns[2].text.strip()
        verband = columns[3].text.strip()
        gruendung = columns[4].text.strip()
        konfoederation = columns[5].text.strip()
        
        df.loc[len(df)] = [beitritt, staat, code, verband, gruendung, konfoederation]

In [6]:
df[['Staat', 'FIFA code', 'Konföderation']]

Unnamed: 0,Staat,FIFA code,Konföderation
0,Afghanistan,AFG,AFC
1,Ägypten,EGY,CAF
2,Albanien,ALB,UEFA
3,Algerien,ALG,CAF
4,Amerikanisch-Samoa,ASA,OFC
...,...,...,...
206,Vereinigte Staaten,USA,CONCACAF
207,Vietnam,VIE,AFC
208,Wales,WAL,UEFA
209,Zentralafrikanische Republik,CTA,CAF


In [8]:
change_county = {
    'Bosnien und Herzegowina': 'Bosnien-Herzegowina',
    'VR China': 'China',
    'Demokratische Republik Kongo': 'DR Kongo',
    'Republik Kongo': 'Kongo'
}

In [9]:
df.to_csv('/mnt/287A29DF7A29AA90/PythonProjects/projects_repo/BuLi_scorers/data/FIFA_codes.csv', index=False)

In [10]:
# wikipedia webpage
url_FIFA_ISOcodes = 'https://en.wikipedia.org/wiki/Comparison_of_alphabetic_country_codes'

# define dataframe
df_fifa_iso = pd.DataFrame(columns=['Country', 'IOC code', 'FIFA code', 'ISO code'])

In [12]:
# Create a handle, page, to handle the contents of the website
data_countries = requests.get(url_FIFA_ISOcodes).text

# Creating BeautifulSoup object
soup_countries = BeautifulSoup(data_countries, 'html.parser')

# Creating list with the tables
table_countries = soup_countries.find('table')

In [13]:
df_fifa_iso = pd.DataFrame(columns=['Country', 'IOC code', 'FIFA code', 'ISO code'])
# Collecting Data
for row in table_countries.tbody.find_all('tr'):
    # Find all data for each column
    columns = row.find_all('td')
        
    if(columns != []):
        country = columns[1].find('a')['title']
        if len(columns[2].find_all('a'))==0:  ioc_code = columns[2].text.strip()
        else: ioc_code = ''
        if len(columns[3].find_all('a'))==0: fifa_code = columns[3].text.strip()
        else: fifa_code =  ''
        if len(columns[4].find_all('a'))==0: iso_code = columns[4].text.strip()
        else: iso_code = ''
        
        df_fifa_iso.loc[len(df_fifa_iso)] = [country, ioc_code, fifa_code, iso_code]

In [14]:
df_fifa_iso.loc[61:90]

Unnamed: 0,Country,IOC code,FIFA code,ISO code
61,Denmark,DEN,DEN,DNK
62,Djibouti,DJI,DJI,DJI
63,Dominica,DMA,DMA,DMA
64,Dominican Republic,DOM,DOM,DOM
65,Ecuador,ECU,ECU,ECU
66,Egypt,EGY,EGY,EGY
67,El Salvador,ESA,SLV,SLV
68,England,,ENG,
69,Equatorial Guinea,GEQ,EQG,GNQ
70,Eritrea,ERI,ERI,ERI


In [16]:
df_FIFA = df_fifa_iso[~(df_fifa_iso['FIFA code']=='')]
df_FIFA

Unnamed: 0,Country,IOC code,FIFA code,ISO code
0,Afghanistan,AFG,AFG,AFG
2,Albania,ALB,ALB,ALB
3,Algeria,ALG,ALG,DZA
4,American Samoa,ASA,ASA,ASM
5,Andorra,AND,AND,AND
...,...,...,...,...
247,Vietnam,VIE,VIE,VNM
248,Wales,,WAL,
251,Yemen,YEM,YEM,YEM
252,Zambia,ZAM,ZAM,ZMB


In [18]:
df_FIFA.to_csv('/mnt/287A29DF7A29AA90/PythonProjects/projects_repo/BuLi_scorers/data/FIFA_ISO_codes.csv', index=False)