# Scrapping fifa codes for respective countries from wikipedia


In [34]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [35]:
url = "https://en.wikipedia.org/wiki/List_of_FIFA_country_codes"

In [36]:
data = requests.get(url).text

In [37]:
soup = BeautifulSoup(data, "html5lib")

In [63]:
tables = soup.find_all("table",class_= "wikitable")

In [64]:
len(tables)

10

In [65]:
fifa_county_codes = pd.DataFrame(columns = ["Country", "Codes"])

In [66]:
for i in range(4):
  for row in tables[i].tbody.find_all("tr"):
    col = row.find_all("td")
    if (col != []):
      Country = col[0].text.strip()
      Codes = col[1].text.strip()

      fifa_county_codes = fifa_county_codes.append({"Country":Country, "Codes":Codes}, ignore_index= True)

fifa_county_codes

Unnamed: 0,Country,Codes
0,Afghanistan,AFG
1,Albania,ALB
2,Algeria,ALG
3,American Samoa,ASA
4,Andorra,AND
...,...,...
206,Vietnam,VIE
207,Wales,WAL
208,Yemen,YEM
209,Zambia,ZAM


In [68]:
fifa_county_codes.to_csv("Fifa_Country_codes.csv")

In [70]:
!ls

Fifa_Country_codes.csv	sample_data


In [71]:
from google.colab import files
files.download("Fifa_Country_codes.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### **Non Fifa member Codes**

In [None]:
tables[4]

In [89]:
Non_fifa_county_codes = []
Non_fifa_county_codes = pd.DataFrame(columns = ["Country", "Codes", "Confederation"])

In [90]:
for row in tables[4].tbody.find_all("tr"):
    col = row.find_all("td")
    if (col != []):
      Country = col[0].text.strip()
      Codes = col[1].text.strip()
      Confederation = col[2].text.strip()

      Non_fifa_county_codes = Non_fifa_county_codes.append({"Country":Country, "Codes":Codes, "Confederation": Confederation}, ignore_index= True)

Non_fifa_county_codes

Unnamed: 0,Country,Codes,Confederation
0,Bonaire,BOE,CONCACAF
1,French Guiana,GUF,CONCACAF
2,Great Britain,GBR,–
3,Guadeloupe,GLP,CONCACAF
4,Kiribati,KIR,OFC
5,Martinique,MTQ,CONCACAF
6,Niue,NIU,OFC
7,Northern Mariana Islands,NMI,AFC
8,Réunion,REU,CAF
9,Saint Martin,SMN,CONCACAF


In [91]:
!ls

Fifa_Country_codes.csv	sample_data


In [94]:
Non_fifa_county_codes.to_csv("Non_fifa_county_codes.csv")

In [95]:
!ls

Fifa_Country_codes.csv	Non_fifa_county_codes.csv
Non_fifa_county_codes	sample_data


In [96]:
files.download("Non_fifa_county_codes.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### **Irregular codes**
The following codes refer to countries or dependent areas that are currently not affiliated with FIFA. Even though they are members or associate members of their regional confederations, these codes are not regularly used in communications of FIFA

In [None]:
tables[5]

In [98]:
Irregular_codes = []
Irregular_codes = pd.DataFrame(columns = ["Country", "Codes"])

In [99]:
for row in tables[5].tbody.find_all("tr"):
    col = row.find_all("td")
    if (col != []):
      Country = col[0].text.strip()
      Codes = col[1].text.strip()
      

      Irregular_codes = Irregular_codes.append({"Country":Country, "Codes":Codes }, ignore_index= True)

Irregular_codes

Unnamed: 0,Country,Codes
0,Åland Islands,AXL
1,Artsakh,NKR
2,Basque Country,BSQ
3,Catalonia,CAT
4,F.S. Micronesia,FSM
5,Galicia,GAL
6,Greenland,GRL
7,Monaco,MON
8,Palau,PLW
9,Sardinia,SRD


In [100]:
Irregular_codes.to_csv("Irregular_codes.csv")

In [101]:
files.download("Irregular_codes.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### **Obsolete country codes**
The following codes are obsolete because a country has ceased to exist, changed its name, changed its code, or has become part of another country.

In [None]:
for i in range(6,9):
  print(tables[i])

In [104]:
Obsolete_country_codes = []
Obsolete_country_codes = pd.DataFrame(columns = ["Country", "Codes"])

In [105]:
for i in range(6,9):
  for row in tables[i].tbody.find_all("tr"):
      col = row.find_all("td")
      if (col != []):
        Country = col[0].text.strip()
        Codes = col[1].text.strip()

        Obsolete_country_codes = Obsolete_country_codes.append({"Country":Country, "Codes":Codes}, ignore_index= True)
Obsolete_country_codes

Unnamed: 0,Country,Codes
0,British Guiana,BGU
1,British India,BIN
2,Bohemia,BOH
3,Burma,BUR
4,Central African Republic,CAF
5,Ceylon,CEY
6,CIS,CIS
7,Czechoslovakia,TCH
8,Dahomey,DAH
9,Dutch East Indies,INH


In [106]:
Obsolete_country_codes.to_csv("Obsolete_country_codes.csv")

In [108]:
files.download("Obsolete_country_codes.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### **FIFA, IOC and ISO differences**
For most countries, FIFA codes are the same as both the International Olympic Committee country codes used for the Olympic Games and the ISO 3166 country codes. When these two are different, FIFA usually chooses one of the two, and for a few countries FIFA uses a distinct code.

In [None]:
tables[9]

In [110]:
FIFA_IOC_ISO_differences = []
FIFA_IOC_ISO_differences = pd.DataFrame(columns = ["Country", "FIFA_Code",	"IOC_Code", "ISO_Code"])

In [111]:
for row in tables[9].tbody.find_all("tr"):
    col = row.find_all("td")
    if (col != []):
      Country = col[0].text.strip()
      FIFA_Code = col[1].text.strip()
      IOC_Code = col[2].text.strip()
      ISO_Code = col[3].text.strip()


      FIFA_IOC_ISO_differences = FIFA_IOC_ISO_differences.append({"Country":Country, "FIFA_Code":FIFA_Code, "IOC_Code": IOC_Code, "ISO_Code": ISO_Code}, ignore_index= True)
FIFA_IOC_ISO_differences

Unnamed: 0,Country,FIFA_Code,IOC_Code,ISO_Code
0,Algeria,ALG,ALG,DZA
1,United Arab Emirates,UAE,UAE,ARE
2,American Samoa,ASA,ASA,ASM
3,Angola,ANG,ANG,AGO
4,Antigua and Barbuda,ATG,ANT,ATG
...,...,...,...,...
83,U.S. Virgin Islands,VIR,ISV,VIR
84,Vietnam,VIE,VIE,VNM
85,Vanuatu,VAN,VAN,VUT
86,Zambia,ZAM,ZAM,ZMB


In [113]:
FIFA_IOC_ISO_differences.to_csv("FIFA_IOC_ISO_differences.csv")

In [114]:
!ls

Fifa_Country_codes.csv	      Non_fifa_county_codes	  sample_data
FIFA_IOC_ISO_differences.csv  Non_fifa_county_codes.csv
Irregular_codes.csv	      Obsolete_country_codes.csv


In [115]:
files.download("FIFA_IOC_ISO_differences.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# **Completed!**

