### Load the Coordinates and Elevation CSV files

In [158]:
import pandas as pd

# Load the coordinate csv file

df_coordinates = pd.read_csv("clean-coordinates-data.csv")

df_coordinates.head(2)

# check the structure of the data using the info() method

Unnamed: 0,country,coordinates,country-code,continent
0,Belarus,"53.5318804783, 28.033566395",BLR,Europe
1,Germany,"51.1063634863, 10.3814938434",DEU,Europe


In [159]:
# Load the elevation csv file

df_elevation = pd.read_csv("clean-elevation-data.csv")

df_elevation.head(2)


Unnamed: 0,country,Numeric-Elevation
0,Afghanistan,1884.0
1,Albania,708.0


### Dataset Validation

In [160]:
# Check if all country names columns in both dataframes are the same

# Check if elements coordinates['country'] are in df_elevation['country']
df_coordinates["in_elevation"] = df_coordinates["country"].isin(
    df_elevation["country"])

# Inspect the result
df_coordinates[df_coordinates["in_elevation"] == False]

Unnamed: 0,country,coordinates,country-code,continent,in_elevation
10,San Marino,"43.9364661946, 12.4422225588",SMR,Europe,False
15,Monaco,"43.7398461874, 7.39897432483",MCO,Europe,False
17,Czechia,"49.7324482055, 15.3141031033",CZE,Europe,False
18,Liechtenstein,"47.1382064295, 9.54090028864",LIE,Europe,False
24,Malta,"35.921026987, 14.4037540314",MLT,Europe,False
39,Republic of Serbia,"44.2213759212, 20.790402471",SRB,Europe,False
42,Vatican,"41.9033124692, 12.4534177294",VAT,Europe,False
44,Macedonia,"41.5950049267, 21.6838304101",MKD,Europe,False



**In Coordinates dataset, Rename the following countries**:
1.  Czechia as Czech Republic.
2.  Republic of Serbia as Serbia.
3.  Macedonia as North Macedonia.
4.  Vatican as Vatican City.

**In Elevation dataset, For Microstates, we will impute the average elevation of the country they are within:**
- San Marino is a microstate within Italy. It is not present in the elevation dataset. 
- Monaco is a microstate within France.
- Vatican City is a microstate within Italy.
- Liechtenstein is a microstate within Switzerland.
  
**Malta is an island nation in the Mediterranean Sea with Average elevation of 127 meters. Add this value to the elevation dataset**


#### Renaming Czechia, Republic of Serbia, Macedonia, Vatican in Coordinates dataset

In [161]:
# Create a function to rename countries in a DataFrame


def rename_countries(df, rename_dict):
    """
    Renames countries in the DataFrame based on a provided dictionary.

    Parameters:
    df (pd.DataFrame): The DataFrame with country data.
    rename_dict (dict): A dictionary where keys are current country names and
                        values are new country names.

    Returns:
    pd.DataFrame: The DataFrame with updated country names.
    """
    for current_name, new_name in rename_dict.items():
        df.loc[df["country"] == current_name, "country"] = new_name

    return df


rename_dict = {
    "Czechia": "Czech Republic",
    "Republic of Serbia": "Serbia",
    "Macedonia": "North Macedonia",
    "Vatican": "Vatican City",
}

df_coordinates = rename_countries(df_coordinates, rename_dict)

# Check results
print(df_coordinates[df_coordinates["country"] == "Czech Republic"])

print(df_coordinates[df_coordinates["country"] == "Serbia"])

print(df_coordinates[df_coordinates["country"] == "North Macedonia"])

print(df_coordinates[df_coordinates["country"] == "Vatican City"])

           country                   coordinates country-code continent  \
17  Czech Republic  49.7324482055, 15.3141031033          CZE    Europe   

    in_elevation  
17         False  
   country                  coordinates country-code continent  in_elevation
39  Serbia  44.2213759212, 20.790402471          SRB    Europe         False
            country                   coordinates country-code continent  \
44  North Macedonia  41.5950049267, 21.6838304101          MKD    Europe   

    in_elevation  
44         False  
         country                   coordinates country-code continent  \
42  Vatican City  41.9033124692, 12.4534177294          VAT    Europe   

    in_elevation  
42         False  


#### Microstates Imputation using their respective countries' average elevation

In [162]:
# Create a function to impute elevation data for a given country


def add_country_with_another_elevation(df, source_country, target_country):
    """
    Adds a new row to the DataFrame for the target country with the elevation of the source country.

    Parameters:
    df (pd.DataFrame): The DataFrame to modify.
    source_country (str): The name of the country whose elevation is to be copied.
    target_country (str): The name of the new country to be added.

    Returns:
    pd.DataFrame: The modified DataFrame.
    """
    # Check if source country is in the DataFrame
    if source_country in df["country"].values:
        # Find source country's elevation
        source_elevation = df.loc[
            df["country"] == source_country, "Numeric-Elevation"
        ].iloc[0]

        # Create a DataFrame for the target country
        target_country_df = pd.DataFrame(
            {"country": [target_country], "Numeric-Elevation": [source_elevation]}
        )

        # Concatenate the existing DataFrame with the target country DataFrame
        return pd.concat([df, target_country_df], ignore_index=True)
    else:
        print(f"{source_country} not found in the DataFrame.")
        return df

In [163]:
# Impute elevation data for San Marino, Monaco, Vatican City, and Liechtenstein

df_elevation = add_country_with_another_elevation(df_elevation, "Italy", "San Marino")

df_elevation = add_country_with_another_elevation(df_elevation, "France", "Monaco")

df_elevation = add_country_with_another_elevation(df_elevation, "Italy", "Vatican City")

df_elevation = add_country_with_another_elevation(
    df_elevation, "Switzerland", "Liechtenstein"
)

# Check results
print(df_elevation[df_elevation["country"] == "San Marino"])

print(df_elevation[df_elevation["country"] == "Monaco"])

print(df_elevation[df_elevation["country"] == "Vatican City"])

print(df_elevation[df_elevation["country"] == "Liechtenstein"])

        country  Numeric-Elevation
173  San Marino              538.0
    country  Numeric-Elevation
174  Monaco              375.0
          country  Numeric-Elevation
175  Vatican City              538.0
           country  Numeric-Elevation
176  Liechtenstein             1350.0


### Add the country "Malta" with average elevation of 127 meters to the elevation dataset.

In [164]:
# Create a function to add a new country with a given elevation


def add_country_with_elevation(df, country, elevation):
    """
    Adds a new row to the DataFrame for a specified country with a given elevation.

    Parameters:
    df (pd.DataFrame): The DataFrame to modify.
    country (str): The name of the country to be added.
    elevation (float): The elevation value for the country.

    Returns:
    pd.DataFrame: The modified DataFrame.
    """
    # Create a DataFrame for the new country
    new_country_df = pd.DataFrame(
        {"country": [country], "Numeric-Elevation": [elevation]}
    )

    # Concatenate the existing DataFrame with the new country DataFrame
    return pd.concat([df, new_country_df], ignore_index=True)


# Add Malta with an elevation of 127
df_elevation = add_country_with_elevation(df_elevation, "Malta", 127.0)
df_elevation[df_elevation["country"] == "Malta"]

Unnamed: 0,country,Numeric-Elevation
177,Malta,127.0


In [165]:
# Merge the two dataframes on the 'country' column

merged = pd.merge(df_coordinates, df_elevation, how="inner", on="country")

# Drop the 'in_elevation' column
merged.drop("in_elevation", axis=1, inplace=True)

# Rename the 'Numeric-Elevation' column
merged.rename(columns={"Numeric-Elevation": "elevation"}, inplace=True)

# Reorder the columns as 'country', 'coordinates', 'elevation', 'continent', 'country-code'
merged = merged[["country", "coordinates",
                 "elevation", "continent", "country-code"]]

merged

Unnamed: 0,country,coordinates,elevation,continent,country-code
0,Belarus,"53.5318804783, 28.033566395",170.0,Europe,BLR
1,Germany,"51.1063634863, 10.3814938434",263.0,Europe,DEU
2,Denmark,"62.0740272545, -6.87055477617",34.0,Europe,DN1
3,Portugal,"39.5871468639, -8.59083758901",372.0,Europe,PRT
4,Andorra,"42.541326525, 1.56075624107",1996.0,Europe,AND
5,Albania,"41.1424820604, 20.053819197",708.0,Europe,ALB
6,Poland,"52.1247718189, 19.3943221398",173.0,Europe,POL
7,Bosnia and Herzegovina,"44.1717340565, 17.7734080203",500.0,Europe,BIH
8,United Kingdom,"49.2210642111, -2.12743081446",162.0,Europe,GB1
9,Lithuania,"55.3270236412, 23.8882490157",110.0,Europe,LTU


In [166]:
# Save the merged dataframe to a csv file
merged.to_csv("4-final-geolocation-data.csv", index=False)