- Study project of web scraping by gathering data about colonialism using Wikipedia articles
- Gathered data will be used for a map plotting study project

In [1]:
from table_scraping import *

# "Countries" dataframe

In [2]:
# Using the list of UN members as list of countries in the world
url = 'https://en.wikipedia.org/wiki/Member_states_of_the_United_Nations'
# url = 'https://en.wikipedia.org/wiki/List_of_sovereign_states_by_date_of_formation'

# Header for this page: <th scope="col"> | Cells in first column: <th scope="row">
data = get_dataframe(get_tables(url)[0], ['td', 'th'], {'scope' : 'col'})
data['Independence'] = np.nan
data['Independence From'] = np.nan
data['Main Colonial Power'] = np.nan

#
data = data.drop(['Date of admission', 'Original member', 'See also'], axis=1)
data = data.rename(columns={"Member state": 'Country'})
data


Unnamed: 0,Country,Independence,Independence From,Main Colonial Power
0,Afghanistan,,,
1,Albania,,,
2,Algeria,,,
3,Andorra,,,
4,Angola,,,
...,...,...,...,...
188,Bolivarian Republic of Venezuela,,,
189,Viet Nam,,,
190,Yemen,,,
191,Zambia,,,


In [3]:
# Renaming some countries
# Plurinational State of Bolivia
data.loc[data['Country'].str.contains(
    "Bolivia"), 'Country'] = 'Bolivia'  
# Brunei Darussalam
data.loc[data['Country'].str.contains(  
    "Brunei"), 'Country'] = 'Brunei'
# Congo
data.iloc[38, 0] = 'Republic of Congo'
# Democratic People's Republic of Korea
data.loc[data['Country'].str.contains(  
    "Democratic People's"), 'Country'] = 'North Korea'
# Islamic Republic of Iran
data.loc[data['Country'].str.contains(  
    "Iran"), 'Country'] = 'Iran'
# Lao People's Democratic Republic
data.loc[data['Country'].str.contains(  
    "Lao "), 'Country'] = 'Laos'
# Republic of Korea
data.loc[data['Country'].str.contains(  
    "Republic of Korea"), 'Country'] = 'South Korea'
# Republic of Moldova
data.loc[data['Country'].str.contains(  
    "Moldova"), 'Country'] = 'Moldova'
# Russian Federation
data.loc[data['Country'].str.contains(  
    "Russian"), 'Country'] = 'Russia'
# Syrian Arab Republic
data.loc[data['Country'].str.contains(  
    "Syrian"), 'Country'] = 'Syria'
# United Kingdom of Great Britain and Northern Ireland
data.loc[data['Country'].str.contains(  
    "United Kingdom"), 'Country'] = 'United Kingdom'
# United Republic of Tanzania
data.loc[data['Country'].str.contains(  
    "Tanzania"), 'Country'] = 'Tanzania'
# United States of America
data.loc[data['Country'].str.contains(
    "United States"), 'Country'] = 'United States'
# Bolivarian Republic of Venezuela
data.loc[data['Country'].str.contains(  
    "Venezuela"), 'Country'] = 'Venezuela'
# Viet Nam
data.loc[data['Country'].str.contains(
    "Viet"), 'Country'] = 'Vietnam'

data


Unnamed: 0,Country,Independence,Independence From,Main Colonial Power
0,Afghanistan,,,
1,Albania,,,
2,Algeria,,,
3,Andorra,,,
4,Angola,,,
...,...,...,...,...
188,Venezuela,,,
189,Vietnam,,,
190,Yemen,,,
191,Zambia,,,


# Africa

In [4]:
url = 'https://en.wikipedia.org/wiki/Decolonisation_of_Africa'

In [5]:
# Gets table containing the dates and colonial powers of african countries
df = get_dataframe(get_tables(url)[6])
df

Unnamed: 0,Country,Date of acquisition of sovereignty,Acquisition of sovereignty
0,Algeria,3 July 1962,French recognition of Algerian referendum on i...
1,Angola,11 November 1975,Independence from Portugal
2,Benin,1 August 1960,Independence from France
3,Botswana,30 September 1966,Independence from the United Kingdom
4,Burkina Faso,5 August 1960,Independence from France
...,...,...,...
58,Uganda,1 March 1962,Self-government granted
59,Uganda,9 October 1962,Independence from the United Kingdom
60,Zambia,24 October 1964,Independence from the United Kingdom
61,Zimbabwe,11 November 1965,Unilateral declaration of independence by Sout...


In [6]:
# Rename columns to fit with the 'data' dataframe
df = df.rename(columns={"Date of acquisition of sovereignty": 'Independence',
                       'Acquisition of sovereignty': 'Independence From'})

# Independence from date to just year
df['Independence'] = df['Independence'].str.split(expand=True)[2]

# Removes duplicates
df = df.drop_duplicates(
    ['Country'], keep='last').reset_index(drop=True)

# Get some colonial powers
df['Independence From'] = df['Independence From'].str.replace(
    "Independence From|independence from | independence from|the| declared| recognized", "", case=False, regex=True)

df['Independence From'] = df['Independence From'].str.strip()

# Corrections
# Algeria, Madagascar, Morocco
df.iloc[[0, 29, 34], 2] = 'France'

# Egypt, South Africa, Sudan, Tanzania, Zimbabwe
df.iloc[[14, 45, 47, 48, 53], 2] = 'United Kingdom'

# Ethiopia
df.iloc[18, [1, 2]] = 'Not Colonized'

# Liberia (Colonized by "American Colonization Society")
df.iloc[27]['Independence From'] = 'United States'

# Libya, Somalia
df.iloc[[28, 44], 2] = 'Italy'

# Namibia
df.iloc[36]['Independence From'] = 'South Africa'

# South Sudan
df.iloc[46]['Independence From'] = 'Sudan'

# Dates
# Libya (Declared: 1947 | Kingdom established: 1951)
# df.iloc[28]['Independence'] = 1947

# Malawi (Dominion: 1964 | Republic: 1966)
df.iloc[30]['Independence'] = 1966

# Mauritius (Elizabeth II as head of state: 1968 - 1992 | Republic: 1992)
# df.iloc[33]['Independence'] = 1992

# Morocco (End of the French Protectorate announced: 1955 | Declaration: 1956)
df.iloc[34]['Independence'] = 1956

# Sierra Leone (Dominion: 1961 | Republic: 1971)
df.iloc[43]['Independence'] = 1971

# South Africa (Statute of Westminster: 1931 | Republic: 1961)
df.iloc[45]['Independence'] = 1961

# Tanzania (Tanganyika: 1961 | Zanzibar: 1963 | Merger: 1964)
# df.iloc[48]['Independence'] = 1964

# Uganda (Dominion: 1962 | Republic: 1963)
df.iloc[51]['Independence'] = 1963

# Names
# Democratic Republic of Congo -> Democratic Republic of the Congo
df.iloc[11, 0] = "Democratic Republic of the Congo"

# Ivory Coast -> Côte d'Ivoire
df.iloc[24, 0] = "Côte d'Ivoire"

df


Unnamed: 0,Country,Independence,Independence From
0,Algeria,1962,France
1,Angola,1975,Portugal
2,Benin,1960,France
3,Botswana,1966,United Kingdom
4,Burkina Faso,1960,France
5,Burundi,1962,Belgium
6,Cabo Verde,1975,Portugal
7,Cameroon,1960,France
8,Central African Republic,1960,France
9,Chad,1960,France


In [7]:
df.iloc[-1, 0]

'\xa0Zimbabwe'

In [8]:
# Cleaning column 'Country'
df['Country'] = df['Country'].str.replace('\xa0', '')
df.iloc[-1, 0]


'Zimbabwe'

In [9]:
# Filling the main dataframe with collected data
map_fill(data, df, 'Country', ['Independence', 'Independence From'])

data


Unnamed: 0,Country,Independence,Independence From,Main Colonial Power
0,Afghanistan,,,
1,Albania,,,
2,Algeria,1962,France,
3,Andorra,,,
4,Angola,1975,Portugal,
...,...,...,...,...
188,Venezuela,,,
189,Vietnam,,,
190,Yemen,,,
191,Zambia,1964,United Kingdom,


## Main Colonial Power

In [10]:
# Gets table containing the dates and colonial powers of african countries
df = get_dataframe(get_tables(url)[0])[['Country', 'Colonial power']]
df = df.rename(columns={'Colonial power': 'Main Colonial Power'})

df


Unnamed: 0,Country,Main Colonial Power
0,Liberia,United States
1,South Africa,United Kingdom
2,Egypt,United Kingdom
3,Ethiopian Empire,Kingdom of Italy United Kingdom
4,Eritrea,Italy
5,Emirate of Cyrenaica,United Kingdom
6,United Kingdom of Libya,United Kingdom French Fourth Republic Emirate...
7,Libya,Italy United Kingdom
8,Sudan,United Kingdom[q] Republic of Egypt
9,South Sudan,United Kingdom[q] Republic of Egypt


In [11]:
df.iloc[-1, 0]


'\xa0Namibia'

In [12]:
# Cleaning column 'Country'
df['Country'] = df['Country'].str.replace('\xa0', '')
df.iloc[-1, 0]


'Namibia'

In [13]:
# Dropping unrecognized countries and countries that no longer exist
df = df.drop([5, 6, 21, 39, 57]).reset_index(drop=True)


In [14]:
# Corrections
df.iloc[3]['Country'] = 'Ethiopia'
df.iloc[3]['Main Colonial Power'] = 'Not Colonized'

df.iloc[30]['Country'] = 'Tanzania'
df.iloc[38]['Country'] = 'Gambia'
df.iloc[39]['Country'] = 'Zimbabwe'
df.iloc[47]['Country'] = 'Cabo Verde'

# Libya and Somalia
df.iloc[[5, 18], 1] = 'Italy'

# Sudan, South Sudan, and Namibia
df.iloc[[6, 7, 53], 1] = 'United Kingdom'

# Tunisia and Morocco
df.iloc[[8, 9], 1] = 'France'

# Cameroon, Burundi, and Ruanda
df.iloc[[12, 31, 32], 1] = 'Germany'

df


Unnamed: 0,Country,Main Colonial Power
0,Liberia,United States
1,South Africa,United Kingdom
2,Egypt,United Kingdom
3,Ethiopia,Not Colonized
4,Eritrea,Italy
5,Libya,Italy
6,Sudan,United Kingdom
7,South Sudan,United Kingdom
8,Tunisia,France
9,Morocco,France


In [15]:
# Filling main dataframe
map_fill(data, df, 'Country', ['Main Colonial Power'])
data

Unnamed: 0,Country,Independence,Independence From,Main Colonial Power
0,Afghanistan,,,
1,Albania,,,
2,Algeria,1962,France,France
3,Andorra,,,
4,Angola,1975,Portugal,Portugal
...,...,...,...,...
188,Venezuela,,,
189,Vietnam,,,
190,Yemen,,,
191,Zambia,1964,United Kingdom,United Kingdom


# America

In [16]:
url = 'https://en.wikipedia.org/wiki/Decolonization_of_the_Americas'


In [17]:
# Gets table containing the dates and colonial powers of american countries
df = get_dataframe(get_tables(url)[0])[
    ['Country', 'Colonial power', 'Independence date']]
df['Independence From'] = np.nan
df =  df.sort_values('Country').reset_index(drop=True)
df


Unnamed: 0,Country,Colonial power,Independence date,Independence From
0,Antigua and Barbuda,United Kingdom,"November 1, 1981",
1,Argentina,Spanish Empire,"May 25, 1810; July 9, 1816",
2,Bahamas,United Kingdom,"July 10, 1973",
3,Barbados,United Kingdom,"November 30, 1966",
4,Belize,United Kingdom,"September 21, 1981",
5,Bolivia,Spanish Empire,"August 6, 1825",
6,Brazil,Portuguese Empire,"August 29, 1825",
7,Canada,United Kingdom,"July 1, 1867",
8,Chile,Spanish Empire,"February 12, 1818",
9,Colombiaas part of Gran Colombia,Spanish Empire,"August 7, 1819",


In [18]:
df.iloc[0, 0]


'\xa0Antigua and Barbuda'

In [19]:
# Cleaning column 'Country'
df['Country'] = df['Country'].str.replace('\xa0', '')
df.iloc[0, 0]


'Antigua and Barbuda'

In [20]:
# Rename columns to fit with the 'data' dataframe
df = df.rename(columns={"Independence date": 'Independence',
                                  'Colonial power': 'Main Colonial Power'})

# Independence from date to just year
df['Independence'] = df['Independence'].str.split(expand=True)[2]
df['Independence'] = df['Independence'].str.replace(';', '')

# Dropping second Dominican Republic independence
df = df.drop(13).reset_index(drop=True)

df


Unnamed: 0,Country,Main Colonial Power,Independence,Independence From
0,Antigua and Barbuda,United Kingdom,1981,
1,Argentina,Spanish Empire,1810,
2,Bahamas,United Kingdom,1973,
3,Barbados,United Kingdom,1966,
4,Belize,United Kingdom,1981,
5,Bolivia,Spanish Empire,1825,
6,Brazil,Portuguese Empire,1825,
7,Canada,United Kingdom,1867,
8,Chile,Spanish Empire,1818,
9,Colombiaas part of Gran Colombia,Spanish Empire,1819,


In [21]:
# Obtaining the correct name for some countries 
df['Country'] = df['Country'].str.replace(
    "as part of | Federal Republic of Central America|Gran Colombia", "", case=False, regex=True)

# Get some colonial powers
df['Main Colonial Power'] = df['Main Colonial Power'].str.replace(
    " Empire|Empire ", "", case=False, regex=True)

df['Main Colonial Power'] = df['Main Colonial Power'].str.strip()

df['Main Colonial Power'] = df['Main Colonial Power'].str.replace(
    "British", "United Kingdom", case=False)
df['Main Colonial Power'] = df['Main Colonial Power'].str.replace(
    "Spanish", "Spain", case=False)
df['Main Colonial Power'] = df['Main Colonial Power'].str.replace(
    "Portuguese", "Portugal", case=False)


In [22]:
# Corrections
df['Independence From'] = df['Main Colonial Power']

# Argentina (Declared)
df.iloc[1, 2] = 1816

# Bolivia (Declared: 1825 | Recognized: 1847)
# df.iloc[5, 2] = 1825

# Brazil (Declared: 1822 | Recognized: 1825)
df.iloc[6, 2] = 1822

# Canada (Confederation: 1867 | Westminster: 1931 | Patriation: 1982)
df.iloc[7, 2] = 1982

# Chile (Declared: 1818 | Recognized: 1844)
# df.iloc[8, 2] = 1818

# Colombia (Declared: 1810 | Recognized: 1819)
df.iloc[9, 2] = 1810

# Cuba
df.iloc[11, 2] = 1902
df.iloc[11, 3] = 'United States'

# Dominican Republic (4 times)
# From Spain in 1821 | From Haiti in 1844
# From Spain in 1865 | From USA in 1924
# df.iloc[13, 3] = 'Haiti'
# df.iloc[13, 3] = 'United States'

# Haiti
# Occupied by USA from 1915 to 1934
df.iloc[19, 1] = 'France'
df.iloc[19, 3] = 'France'

# Panama (From Spain: 1821 | From Colombia: 1903)
df.iloc[24, 2] = 1903
df.iloc[24, 3] = 'Colombia'

# Uruguai
df.iloc[33, 1] = 'Spain'
df.iloc[33, 3] = 'Brazil'

df


Unnamed: 0,Country,Main Colonial Power,Independence,Independence From
0,Antigua and Barbuda,United Kingdom,1981,United Kingdom
1,Argentina,Spain,1816,Spain
2,Bahamas,United Kingdom,1973,United Kingdom
3,Barbados,United Kingdom,1966,United Kingdom
4,Belize,United Kingdom,1981,United Kingdom
5,Bolivia,Spain,1825,Spain
6,Brazil,Portugal,1822,Portugal
7,Canada,United Kingdom,1982,United Kingdom
8,Chile,Spain,1818,Spain
9,Colombia,Spain,1810,Spain


In [23]:
map_fill(data, df, 'Country', ['Independence', 'Independence From', 'Main Colonial Power'])

data


Unnamed: 0,Country,Independence,Independence From,Main Colonial Power
0,Afghanistan,,,
1,Albania,,,
2,Algeria,1962,France,France
3,Andorra,,,
4,Angola,1975,Portugal,Portugal
...,...,...,...,...
188,Venezuela,1821,Spain,Spain
189,Vietnam,,,
190,Yemen,,,
191,Zambia,1964,United Kingdom,United Kingdom


# Asia

In [24]:
url = 'https://en.wikipedia.org/wiki/Decolonisation_of_Asia'


In [25]:
# Gets table containing the dates and colonial powers of asian countries
df = get_dataframe(get_tables(url)[0])

# Rename columns to fit with the 'data' dataframe
df['Main Colonial Power'] = None
df = df.rename(columns={"Date of acquisition of sovereignty": 'Independence',
                        'Acquisition of sovereignty': 'Independence From'})
# 
df['Independence'] = df['Independence'].str.split(expand=True)[2]
df.iloc[7, 0] = 'China'

# Remove duplicates
df = df.drop_duplicates(['Country'], keep='last').reset_index(drop = True)

df


Unnamed: 0,Country,Independence,Independence From,Main Colonial Power
0,Afghanistan,,Treaty of Rawalpindi ends British control of f...,
1,Bahrain,1971,End of treaties with the United Kingdom,
2,Bangladesh,1971,Independence from Pakistan declared,
3,Bhutan,,Ugyen Wangchuck ends a period of civil war and...,
4,Brunei,1984,Brunei regains its independence after an agree...,
5,Cambodia,1989,Becomes free from Vietnamese occupation; it ge...,
6,China,,"In 221 BC, Qin Shi Huang conquered the various...",
7,India,1947,Independence from the British Empire,
8,Indonesia,1949,Independence from the Kingdom of the Netherlan...,
9,Iran,,After the fall of Assyria between 616 BC and 6...,


In [26]:
df.iloc[0, 0]


'\xa0Afghanistan'

In [27]:
# Cleaning column 'Country'
df['Country'] = df['Country'].str.replace('\xa0', '')
df.iloc[0, 0]


'Afghanistan'

In [28]:
# Get some colonial powers
df['Independence From'] = df['Independence From'].str.replace(
    "Independence From|independence from | independence from|the| declared| recognized|End of treaties with ",
    "", case=False, regex=True)

df['Independence From'] = df['Independence From'].str.strip()

df.loc[df['Independence From'].str.contains(
    "British|United Kingdom"), ['Independence From', 'Main Colonial Power']] = 'United Kingdom'

df.loc[df['Independence From'].str.contains(
    "Soviet"), 'Main Colonial Power'] = 'Russia'

df.loc[df['Independence From'].str.contains(
    "Netherlands"), ['Independence From', 'Main Colonial Power']] = 'Netherlands'


In [29]:
# Corrections
# Afghanistan
df.iloc[0, 1] = 1919

# Bangladesh
df.iloc[2, 3] = 'United Kingdom'

# Bhutan, Japan, China, Iran, Nepal, Saudi Arabia, Thailand
df.iloc[[3, 6, 9, 12, 22, 28, 33], [1, 2, 3]] = 'Not Colonized'

# Cambodia
df.iloc[5, 1] = 1953

# Kuwait
df.iloc[14, 1] = 1961

# Malaysia
df.iloc[18, 1] = 1963

# Mongolia
df.iloc[20, [2, 3]] = 'China'

# Oman
df.iloc[23, 1] = 1970

# Philippines
df.iloc[26, 2] = 'United States'
df.iloc[26, 3] = 'Spain'

# Qatar
df.iloc[27, 1] = 1971

# Singapore
df.iloc[29, 1] = 1965
df.iloc[29, 2] = 'Malaysia'
df.iloc[29, 3] = 'United Kingdom'

# Syria
df.iloc[31, 1] = 1946

# Timor-Leste (From Portugal: 1975 | From Indonesia: 2002)
df.iloc[34, 2] = 'Indonesia'
df.iloc[34, 3] = 'Portugal'

# Vietnam
df.iloc[38, 1] = 1945

# Yemen (North: 1918 | South: 1967 | Unification: 1990)
# df.iloc[39, 1] = 1990

#
# Iraq, Kuwait, Malaysia, Oman, Palestine, Qatar
df.iloc[[10, 14, 18, 23, 25, 27], [2, 3]] = 'United Kingdom'

# Cambodia, Syria, Vietnam
df.iloc[[5, 31, 38], [2, 3]] = 'France'

df['Main Colonial Power'] = df['Main Colonial Power'].fillna(df['Independence From'])


In [30]:
df


Unnamed: 0,Country,Independence,Independence From,Main Colonial Power
0,Afghanistan,1919,United Kingdom,United Kingdom
1,Bahrain,1971,United Kingdom,United Kingdom
2,Bangladesh,1971,Pakistan,United Kingdom
3,Bhutan,Not Colonized,Not Colonized,Not Colonized
4,Brunei,1984,United Kingdom,United Kingdom
5,Cambodia,1953,France,France
6,China,Not Colonized,Not Colonized,Not Colonized
7,India,1947,United Kingdom,United Kingdom
8,Indonesia,1949,Kingdom of Nerlands following ir unilateral P...,Kingdom of Nerlands following ir unilateral P...
9,Iran,Not Colonized,Not Colonized,Not Colonized


In [31]:
map_fill(data, df, 'Country', ['Independence',
         'Independence From', 'Main Colonial Power'])

data


Unnamed: 0,Country,Independence,Independence From,Main Colonial Power
0,Afghanistan,1919,United Kingdom,United Kingdom
1,Albania,,,
2,Algeria,1962,France,France
3,Andorra,,,
4,Angola,1975,Portugal,Portugal
...,...,...,...,...
188,Venezuela,1821,Spain,Spain
189,Vietnam,1945,France,France
190,Yemen,1967,United Kingdom,United Kingdom
191,Zambia,1964,United Kingdom,United Kingdom


# Oceania

In [32]:
url = 'https://en.wikipedia.org/wiki/Decolonisation_of_Oceania'

In [33]:
# # Another possible table
# df = get_dataframe(get_tables(url)[0], ['td', 'th'], {'scope': 'col'})[['Country', 'Colonial power', 'Independence date']]

# df['Independence From'] = None
# df = df.rename(columns={'Independence date': 'Independence',
#                         'Colonial power': 'Main Colonial Power'})

# df['Independence'] = df['Independence'].str.split(expand=True)[2]

# df


In [34]:
# Gets table
df = get_dataframe(get_tables(url)[1])[['Country', 'Date of acquisition of sovereignty', 'Acquisition of sovereignty']]

# Rename columns to fit with the 'data' dataframe
df['Main Colonial Power'] = None
df = df.rename(columns={"Date of acquisition of sovereignty": 'Independence',
                        'Acquisition of sovereignty': 'Independence From'})

df['Independence'] = df['Independence'].str.split(expand=True)[2]

# Remove duplicates
df = df.drop_duplicates(['Country'], keep='last').reset_index(drop=True)

df


Unnamed: 0,Country,Independence,Independence From,Main Colonial Power
0,Australia,1986,Australia Act 1986 – Remaining legal ties betw...,
1,Fiji,1970,Independence from the United Kingdom,
2,Kiribati,1979,Independence from the United Kingdom,
3,Marshall Islands,1986,Compact of Free Association with the United St...,
4,Federated States of Micronesia,1986,Compact of Free Association with the United St...,
5,Nauru,1968,"Independence from UN Trusteeship (Australian, ...",
6,New Zealand,1967,Governor-General becomes a New Zealand appoint...,
7,Palau,1994,Emerged from United Nations trusteeship (admin...,
8,Papua New Guinea,1975,Independence from Australia,
9,Samoa,1962,Independence from New Zealand,


In [35]:
df.iloc[0, 0]


'\xa0Australia'

In [36]:
# Cleaning column 'Country'
df['Country'] = df['Country'].str.replace('\xa0', '')
df.iloc[0, 0]


'Australia'

In [37]:
# Get some colonial powers
df['Independence From'] = df['Independence From'].str.replace(
    "Independence From|the | declared| recognized| recognised|End of treaties with ",
    "", case=False, regex=True)

df['Independence From'] = df['Independence From'].str.strip()

df.loc[df['Independence From'].str.contains(
    "British|UK|United Kingdom"), ['Independence From', 'Main Colonial Power']] = 'United Kingdom'


In [38]:
# Corrections
# Marshall Islands, Federated States of Micronesia, Palau
df.iloc[[3, 4, 7], 2] = 'United States'
df.iloc[[3, 4, 7], 3] = 'Spain'

# New Zealand (Westminster: 1947)
df.iloc[6, 1] = 1947
df.iloc[6, [2, 3]] = 'United Kingdom'

# Papua New Guinea, Samoa
df.iloc[[8, 9], 3] = 'Germany'

In [39]:
df

Unnamed: 0,Country,Independence,Independence From,Main Colonial Power
0,Australia,1986,United Kingdom,United Kingdom
1,Fiji,1970,United Kingdom,United Kingdom
2,Kiribati,1979,United Kingdom,United Kingdom
3,Marshall Islands,1986,United States,Spain
4,Federated States of Micronesia,1986,United States,Spain
5,Nauru,1968,United Kingdom,United Kingdom
6,New Zealand,1947,United Kingdom,United Kingdom
7,Palau,1994,United States,Spain
8,Papua New Guinea,1975,Australia,Germany
9,Samoa,1962,New Zealand,Germany


In [40]:
map_fill(data, df, 'Country', ['Independence',
         'Independence From', 'Main Colonial Power'])

data


Unnamed: 0,Country,Independence,Independence From,Main Colonial Power
0,Afghanistan,1919,United Kingdom,United Kingdom
1,Albania,,,
2,Algeria,1962,France,France
3,Andorra,,,
4,Angola,1975,Portugal,Portugal
...,...,...,...,...
188,Venezuela,1821,Spain,Spain
189,Vietnam,1945,France,France
190,Yemen,1967,United Kingdom,United Kingdom
191,Zambia,1964,United Kingdom,United Kingdom


# Last Corrections

In [46]:
data[data['Independence'].isnull()]

Unnamed: 0,Country,Independence,Independence From,Main Colonial Power
1,Albania,,,
3,Andorra,,,
9,Austria,,,
15,Belarus,,,
16,Belgium,,,
21,Bosnia and Herzegovina,,,
25,Bulgaria,,,
41,Croatia,,,
44,Czechia,,,
47,Denmark,,,


In [42]:
# Armenia, Azerbaijan, Kazakhstan
data.iloc[[7, 10, 87], 1] = 1991
data.iloc[[7, 10, 87], 2] = 'Soviet Union'
data.iloc[[7, 10, 87], 3] = 'Russia'

# Cyprus
data.iloc[43, 1] = 1960

# Malta
data.iloc[106, 1] = 1964

#
# Cyprus, Malta
data.iloc[[43, 106], [2, 3]] = 'United Kingdom'


In [45]:
# Koreas
data.loc[data['Country'].str.contains(
    "Korea"), ['Independence', 'Independence From', 'Main Colonial Power']] = 'Not Colonized'


In [None]:
# data.to_csv('data.csv')