# Finding coordinates for each country #

### The OpenWeather API requires latitude and longitude coordinates in order to retrieve weather data ###
'Countries.csv' was used to provide these coordinates (via Google Dataset Publishing Language). However, in the first attempt of merge, multiple rows did not have values due to small mismatches like missing accents or rephrasing of name. This notebook shows the process of using fuzzy matching to obtain coordinates for those country names which are in fact the same place.

In [3]:
import pandas as pd 

from pathlib import Path

from thefuzz import process

from collections import Counter, defaultdict # For finding duplicates in the country name mapping without key error

In [4]:
data_dir = Path("project_data")

csv_file_path = data_dir / "full_sorted_tourism_with_recommendation.csv"

dest_df = pd.read_csv(csv_file_path, header=0)

In [5]:
# Checking data, 198 rows, Country, Mean total arrivals and Recommendations complete.

dest_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 198 entries, 0 to 197
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Unnamed: 0           198 non-null    int64  
 1   Country              198 non-null    object 
 2   Arrivals             198 non-null    object 
 3   Units                198 non-null    object 
 4   2018                 198 non-null    float64
 5   2019                 195 non-null    float64
 6   2020                 189 non-null    float64
 7   2021                 181 non-null    float64
 8   2022                 148 non-null    float64
 9   Mean Total Arrivals  198 non-null    float64
 10  Recommendation       198 non-null    object 
dtypes: float64(6), int64(1), object(4)
memory usage: 17.1+ KB


In [6]:
# Checking what data looks like

dest_df.head(25)

Unnamed: 0.1,Unnamed: 0,Country,Arrivals,Units,2018,2019,2020,2021,2022,Mean Total Arrivals,Recommendation
0,2,Albania,total arrivals,thousands,5927.0,6406.0,2658.0,5689.0,7543.8,5644.76,Go
1,8,Algeria,total arrivals,thousands,2657.0,2371.0,591.0,125.0,1398.0,1428.4,Go
2,14,American Samoa,total arrivals,thousands,51.8,58.6,0.9,,,37.1,No Go
3,20,Andorra,total arrivals,thousands,8328.0,8235.0,5207.0,5422.0,8426.7,7123.74,Go
4,26,Angola,total arrivals,thousands,218.0,218.0,64.0,64.0,130.0,138.8,No Go
5,32,Anguilla,total arrivals,thousands,87.0,166.0,41.1,28.7,95.8,83.72,No Go
6,38,Antigua And Barbuda,total arrivals,thousands,1064.0,1035.0,276.0,250.0,642.4,653.48,Go
7,44,Argentina,total arrivals,thousands,10394.0,11131.0,3096.0,460.0,7503.0,6516.8,Go
8,50,Armenia,total arrivals,thousands,1652.0,1894.0,375.0,870.0,1666.0,1291.4,Go
9,56,Aruba,total arrivals,thousands,1897.0,1951.0,623.0,943.0,1711.0,1425.0,Go


In [7]:
countries_path = data_dir / "countries.csv"

co_df = pd.read_csv(countries_path, header=0)  # dataset with latitude and longitude coordinates

In [8]:
# Checking data, 245 rows, no missing country names or coordinates.

co_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 245 entries, 0 to 244
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   country    244 non-null    object 
 1   latitude   244 non-null    float64
 2   longitude  244 non-null    float64
 3   name       245 non-null    object 
dtypes: float64(2), object(2)
memory usage: 7.8+ KB


In [9]:
# Intrigued by country with no coordinates

co_df[co_df["latitude"].isna()]

# Makes sense as multiple places

Unnamed: 0,country,latitude,longitude,name
226,UM,,,U.S. Minor Outlying Islands


In [10]:
# Checking what data looks like

co_df.head(25)

Unnamed: 0,country,latitude,longitude,name
0,AD,42.546245,1.601554,Andorra
1,AE,23.424076,53.847818,United Arab Emirates
2,AF,33.93911,67.709953,Afghanistan
3,AG,17.060816,-61.796428,Antigua and Barbuda
4,AI,18.220554,-63.068615,Anguilla
5,AL,41.153332,20.168331,Albania
6,AM,40.069099,45.038189,Armenia
7,AN,12.226079,-69.060087,Netherlands Antilles
8,AO,-11.202692,17.873887,Angola
9,AQ,-75.250973,-0.071389,Antarctica


In [11]:
# In first attempt 36 rows had no values to be merged, 6 of which due to lack of capitalisation in And/Of.
# So using Title format for country names, to match for merge with detinations df.

co_df["name"]=co_df["name"].str.title() 

In [12]:
co_df.rename(columns={"country":"code", "name": "Country"}, inplace=True) # renaming columns so the two dataframes can be merged

In [13]:
co_df.head(30)

Unnamed: 0,code,latitude,longitude,Country
0,AD,42.546245,1.601554,Andorra
1,AE,23.424076,53.847818,United Arab Emirates
2,AF,33.93911,67.709953,Afghanistan
3,AG,17.060816,-61.796428,Antigua And Barbuda
4,AI,18.220554,-63.068615,Anguilla
5,AL,41.153332,20.168331,Albania
6,AM,40.069099,45.038189,Armenia
7,AN,12.226079,-69.060087,Netherlands Antilles
8,AO,-11.202692,17.873887,Angola
9,AQ,-75.250973,-0.071389,Antarctica


In [14]:
# In first attempt of merge, multiple rows did not have values due to small mismatches like missing accents or rephrasing of name.
# Using fuzzy matching to catch those country names which are in fact the same place.

dest_names = dest_df["Country"].unique().tolist()
co_names = co_df["Country"].unique().tolist()

In [15]:
dest_names # 198 country names

['Albania',
 'Algeria',
 'American Samoa',
 'Andorra',
 'Angola',
 'Anguilla',
 'Antigua And Barbuda',
 'Argentina',
 'Armenia',
 'Aruba',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bermuda',
 'Bhutan',
 'Bolivia, Plurinational State Of',
 'Bosnia And Herzegovina',
 'Botswana',
 'Brazil',
 'British Virgin Islands',
 'Brunei Darussalam',
 'Bulgaria',
 'Burkina Faso',
 'Cabo Verde',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Cayman Islands',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'Colombia',
 'Comoros',
 'Congo',
 'Congo, Democratic Republic Of The',
 'Cook Islands',
 'Costa Rica',
 'Cote D´Ivoire',
 'Croatia',
 'Cuba',
 'Curaçao',
 'Cyprus',
 'Czech Republic (Czechia)',
 'Denmark',
 'Djibouti',
 'Dominica',
 'Dominican Republic',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'Estonia',
 'Eswatini',
 'Ethiopia',
 'Fiji',
 'Finland',
 'France',
 'French Polynesia',
 'Gambia',
 'Georgi

In [16]:
co_names # 245 country names, don"t need merge values for those extra not in dest df, so will map co to dest

['Andorra',
 'United Arab Emirates',
 'Afghanistan',
 'Antigua And Barbuda',
 'Anguilla',
 'Albania',
 'Armenia',
 'Netherlands Antilles',
 'Angola',
 'Antarctica',
 'Argentina',
 'American Samoa',
 'Austria',
 'Australia',
 'Aruba',
 'Azerbaijan',
 'Bosnia And Herzegovina',
 'Barbados',
 'Bangladesh',
 'Belgium',
 'Burkina Faso',
 'Bulgaria',
 'Bahrain',
 'Burundi',
 'Benin',
 'Bermuda',
 'Brunei',
 'Bolivia',
 'Brazil',
 'Bahamas',
 'Bhutan',
 'Bouvet Island',
 'Botswana',
 'Belarus',
 'Belize',
 'Canada',
 'Cocos [Keeling] Islands',
 'Congo [Drc]',
 'Central African Republic',
 'Congo [Republic]',
 'Switzerland',
 "Côte D'Ivoire",
 'Cook Islands',
 'Chile',
 'Cameroon',
 'China',
 'Colombia',
 'Costa Rica',
 'Cuba',
 'Cape Verde',
 'Christmas Island',
 'Cyprus',
 'Czech Republic',
 'Germany',
 'Djibouti',
 'Denmark',
 'Dominica',
 'Dominican Republic',
 'Algeria',
 'Ecuador',
 'Estonia',
 'Egypt',
 'Western Sahara',
 'Eritrea',
 'Spain',
 'Ethiopia',
 'Finland',
 'Fiji',
 'Falkland 

In [17]:
# Using fuzzy mapping process to find matches for the names in coordinates data

fuzzy_mapping1 = {}
for co_name in co_df["Country"].unique():
    match, score = process.extractOne(co_name, dest_names)
    fuzzy_mapping1[co_name] = {"match": match, "score": score}

In [18]:
# Manually investigating non-exact matches

for co_name, info in fuzzy_mapping1.items():
    if info["score"] < 100:
        print(f"{co_name} -- {info["match"]} (Score: {info["score"]})")

Afghanistan -- Ghana (Score: 72)
Netherlands Antilles -- Netherlands (Score: 90)
Antarctica -- Qatar (Score: 72)
Burundi -- Brunei Darussalam (Score: 69)
Brunei -- Brunei Darussalam (Score: 90)
Bolivia -- Bolivia, Plurinational State Of (Score: 90)
Bouvet Island -- Poland (Score: 72)
Cocos [Keeling] Islands -- Cayman Islands (Score: 86)
Congo [Drc] -- Congo (Score: 90)
Congo [Republic] -- Congo (Score: 90)
Côte D'Ivoire -- Cote D´Ivoire (Score: 92)
Cape Verde -- Cabo Verde (Score: 80)
Christmas Island -- Poland (Score: 72)
Czech Republic -- Czech Republic (Czechia) (Score: 90)
Western Sahara -- Aruba (Score: 68)
Eritrea -- Sierra Leone (Score: 62)
Falkland Islands [Islas Malvinas] -- Cayman Islands (Score: 86)
Micronesia -- Micronesia, Federated States Of (Score: 90)
Faroe Islands -- British Virgin Islands (Score: 86)
Gabon -- Bosnia And Herzegovina (Score: 60)
French Guiana -- Guyana (Score: 75)
Guernsey -- Germany (Score: 67)
Gibraltar -- Malta (Score: 72)
Greenland -- Grenada (Score

In [19]:
# Not all of these non-matches are relevant.
# Only need to look at the destination names which do not have a perfect match.

matched_dest_names = {info["match"] for co_name, info in fuzzy_mapping1.items() if info["score"] == 100}


unmatched_dest_names = [name for name in dest_names if name not in matched_dest_names]

In [20]:
unmatched_dest_names

['Bolivia, Plurinational State Of',
 'Brunei Darussalam',
 'Cabo Verde',
 'Congo',
 'Congo, Democratic Republic Of The',
 'Cote D´Ivoire',
 'Curaçao',
 'Czech Republic (Czechia)',
 'Eswatini',
 'Hong Kong, China',
 'Iran, Islamic Republic Of',
 'Korea, Republic Of',
 'Lao People´S Democratic Republic',
 'Macao, China',
 'Micronesia, Federated States Of',
 'Moldova, Republic Of',
 'Myanmar',
 'North Macedonia',
 'Reunion',
 'Russian Federation',
 'Sao Tome And Principe',
 'Sint Maarten (Dutch Part)',
 'State Of Palestine',
 'Syrian Arab Republic',
 'Taiwan Province Of China',
 'Tanzania, United Republic Of',
 'Türkiye',
 'United States Of America',
 'United States Virgin Islands',
 'Viet Nam']

In [21]:
# Manually comparing this list with the <100 score matches, we can see that many countries have the correct fuzzy match.
# This covers: "Bolivia, Plurinational State Of", "Brunei Darussalam", "Cabo Verde", "Cote D´Ivoire", "Czech Republic (Czechia)",
# "Hong Kong, China", "Iran, Islamic Republic Of", "Micronesia, Federated States Of", "Moldova, Republic Of", "Myanmar", "North Macedonia",
# "Reunion", "Russian Federation", "Sao Tome And Principe", "Syrian Arab Republic", "Taiwan Province Of China","Tanzania, United Republic Of"
# "United States Of America" and "Viet Nam"


# Issues are: Congo (needs to not be republic and DRC separately, Curacao, Eswatini (is called Swaziland), Honk Kong, 
# Korea (want SK as NK no go), Lao, Macao, Sint Maarten, State Of Palestine, US Virgin Islands, 
# Turkey should be Turkiye not Turks and Cacos


# Manually inspecting the countries df to find the names for the above missing values.


cn = [n for n in co_names if "con" in n.lower()]
swa = [n for n in co_names if "swa" in n.lower()]
hong = [n for n in co_names if "hong" in n.lower()]
korea = [n for n in co_names if "korea" in n.lower()]
lao = [n for n in co_names if "lao" in n.lower()]
mac = [n for n in co_names if "mac" in n.lower()]
pal = [n for n in co_names if "palest" in n.lower()]
tai = [n for n in co_names if "taiwan" in n.lower()]
vn = [n for n in co_names if "virgin" in n.lower()]
cura = [n for n in co_names if "cura" in n.lower()]
maa = [n for n in co_names if "maarten" in n.lower()]

print(cn, swa, hong, korea, lao, mac, pal, tai, vn, cura, maa)

# Those with available matches are added to the manual overrides dictionary below.

['Congo [Drc]', 'Congo [Republic]'] ['Botswana', 'Swaziland'] ['Hong Kong'] ['North Korea', 'South Korea'] ['Laos'] ['Macedonia [Fyrom]', 'Macau'] ['Palestinian Territories'] ['Taiwan'] ['British Virgin Islands', 'U.S. Virgin Islands'] [] []


In [22]:
manual_overrides = {
    "Congo [Republic]" : "Congo",
    "Congo [Drc]" : "Congo, Democratic Republic Of The",
    "Turkey" : "Türkiye",
    "Swaziland" : "Eswatini", 
    "Hong Kong" : "Hong Kong, China",
    "South Korea" : "Korea, Republic Of",
    "North Korea" : "Nowhere",
    "Laos" : "Lao People´S Democratic Republic",
    "Macau" : "Macao, China",
    "Palestinian Territories" : "State Of Palestine",
    "U.S. Virgin Islands" : "United States Virgin Islands" 
}

# No manual override for Sint Maarten and Curacao as neither is in the countries df under any other name.
# Therefore have searched for these manually to add in.
# CW Curacao lat: 12.1695 long: 68.990021
# MF Sint Maarten (Dutch Part) lat: 18.0528 long: -63.0425

In [23]:
# Mapping names of countries in the co_df to their fuzzy match, unless manually overriden

final_mapping = {}

for co_name in co_df["Country"].unique():
    if co_name in manual_overrides:
        final_mapping[co_name] = manual_overrides[co_name]
    else:
        final_mapping[co_name] = fuzzy_mapping1[co_name]["match"]

In [24]:
co_df["Country"].map(final_mapping).head(30)

0                             Andorra
1                United Arab Emirates
2                               Ghana
3                 Antigua And Barbuda
4                            Anguilla
5                             Albania
6                             Armenia
7                         Netherlands
8                              Angola
9                               Qatar
10                          Argentina
11                     American Samoa
12                            Austria
13                          Australia
14                              Aruba
15                         Azerbaijan
16             Bosnia And Herzegovina
17                           Barbados
18                         Bangladesh
19                            Belgium
20                       Burkina Faso
21                           Bulgaria
22                            Bahrain
23                  Brunei Darussalam
24                              Benin
25                            Bermuda
26          

In [25]:
# Noticed another problem here, Brunei appears twice and so would have two entries after merging.
# Realised this happens a few times as e.g. Algeria and Liberia both fuzzy matched to Algeria so the final mapping sent them both there. 
# Need to reverse engineer this to ensure that duplicates are sent "nowehere" to avoid giving multiple coordinates for one country.

# Detecting duplicate fuzzy matches, by counting how many times each dest name was mapped to

dest_counts = Counter(final_mapping.values())

# Find destinations that were matched more than once

duplicates = {dest for dest, count in dest_counts.items() if count > 1}
duplicates

{'Algeria',
 'Antigua And Barbuda',
 'Armenia',
 'Aruba',
 'Australia',
 'Bahrain',
 'Bolivia, Plurinational State Of',
 'Bosnia And Herzegovina',
 'British Virgin Islands',
 'Brunei Darussalam',
 'Cayman Islands',
 'Cook Islands',
 'French Polynesia',
 'Georgia',
 'Germany',
 'Ghana',
 'Grenada',
 'Guinea-Bissau',
 'Guyana',
 'India',
 'Iran, Islamic Republic Of',
 'Italy',
 'Mali',
 'Malta',
 'Mauritius',
 'Nepal',
 'Netherlands',
 'New Zealand',
 'Northern Mariana Islands',
 'Poland',
 'Qatar',
 'Saint Kitts And Nevis',
 'Saint Lucia',
 'Saint Vincent And The Grenadines',
 'Seychelles',
 'Sierra Leone',
 'Solomon Islands',
 'Sweden',
 'Tajikistan',
 'Togo',
 'Ukraine'}

In [26]:
# Need to choose only the 100 score fuzzy match for the duplicated destinations

match_scores = {
    (country, data["match"]): data["score"]
    for country, data in fuzzy_mapping1.items()
    if country not in manual_overrides
}

# Group which countries mapped to each destination

reverse_mapping = defaultdict(list)

for co_name, dest_name in final_mapping.items():
    reverse_mapping[dest_name].append(co_name)

reverse_mapping

defaultdict(list,
            {'Andorra': ['Andorra'],
             'United Arab Emirates': ['United Arab Emirates'],
             'Ghana': ['Afghanistan', 'Ghana', 'Gaza Strip'],
             'Antigua And Barbuda': ['Antigua And Barbuda',
              'Heard Island And Mcdonald Islands'],
             'Anguilla': ['Anguilla'],
             'Albania': ['Albania'],
             'Armenia': ['Armenia', 'Turkmenistan'],
             'Netherlands': ['Netherlands Antilles', 'Netherlands'],
             'Angola': ['Angola'],
             'Qatar': ['Antarctica', 'Qatar'],
             'Argentina': ['Argentina'],
             'American Samoa': ['American Samoa'],
             'Austria': ['Austria'],
             'Australia': ['Australia', 'Libya'],
             'Aruba': ['Aruba', 'Western Sahara'],
             'Azerbaijan': ['Azerbaijan'],
             'Bosnia And Herzegovina': ['Bosnia And Herzegovina', 'Gabon'],
             'Barbados': ['Barbados'],
             'Bangladesh': ['Bangladesh'

In [27]:
# Reassign fuzzy duplicates only — preserving manual overrides

for dup in duplicates:
    matched_countries = reverse_mapping[dup] # list of countries that mapped there

    # Skip if only one match or if all are manual
    
    fuzzy_matches = [c for c in matched_countries if c not in manual_overrides] # removes manual overrides from matched 
    if len(fuzzy_matches) <= 1 or all(c in manual_overrides for c in matched_countries):
        continue 
    # Pick best fuzzy match (prefer score 100)

    # Default to None
    correct = None
    highest_score = -1
    highest_score_country = None
    
    for country in fuzzy_matches:
        score = match_scores.get((country, dup), 0) 
    
        if score == 100:
            correct = country
            break  # Prefer exact match, stop here
    
        if score > highest_score:
            highest_score = score
            highest_score_country = country # To look for next highest score through remaining
    
    if correct is None:
        correct = highest_score_country # Fall back to the best one if no 100
    
   
    for country in fuzzy_matches:
        if country != correct:
            final_mapping[country] = "Nowhere"

In [28]:
# Use the mapping to create column in co_df with names matching the dest_df

co_df["Destination Country"] = co_df["Country"].map(final_mapping)

In [29]:
# Can now merge the data-sets together

merge_df = pd.merge(dest_df, co_df[["Destination Country", "latitude", "longitude"]], left_on="Country", right_on="Destination Country",how="left")

merge_df

Unnamed: 0.1,Unnamed: 0,Country,Arrivals,Units,2018,2019,2020,2021,2022,Mean Total Arrivals,Recommendation,Destination Country,latitude,longitude
0,2,Albania,total arrivals,thousands,5927.0,6406.0,2658.0,5689.0,7543.8,5644.76,Go,Albania,41.153332,20.168331
1,8,Algeria,total arrivals,thousands,2657.0,2371.0,591.0,125.0,1398.0,1428.40,Go,Algeria,28.033886,1.659626
2,14,American Samoa,total arrivals,thousands,51.8,58.6,0.9,,,37.10,No Go,American Samoa,-14.270972,-170.132217
3,20,Andorra,total arrivals,thousands,8328.0,8235.0,5207.0,5422.0,8426.7,7123.74,Go,Andorra,42.546245,1.601554
4,26,Angola,total arrivals,thousands,218.0,218.0,64.0,64.0,130.0,138.80,No Go,Angola,-11.202692,17.873887
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,1160,Uzbekistan,total arrivals,thousands,5346.0,6749.0,1504.0,1881.0,5233.0,4142.60,Go,Uzbekistan,41.377491,64.585262
194,1166,Vanuatu,total arrivals,thousands,350.2,256.0,82.4,,65.0,188.40,No Go,Vanuatu,-15.376706,166.959158
195,1172,Viet Nam,total arrivals,thousands,15498.0,18009.0,3837.0,157.0,3661.0,8232.40,Go,Viet Nam,14.058324,108.277199
196,1178,Zambia,total arrivals,thousands,1072.0,1266.0,502.0,554.0,,848.50,Go,Zambia,-13.133897,27.849332


In [30]:
# Checking same number of rows in original dest_df and new merge_df

print(dest_df.shape)

print(merge_df.shape)

# No duplicates

(198, 11)
(198, 14)


In [31]:
# Checking for missing values

merge_df[merge_df["latitude"].isna()]

Unnamed: 0.1,Unnamed: 0,Country,Arrivals,Units,2018,2019,2020,2021,2022,Mean Total Arrivals,Recommendation,Destination Country,latitude,longitude
49,296,Curaçao,total arrivals,thousands,1210.0,1293.0,436.0,418.0,1034.2,878.24,Go,,,
162,974,Sint Maarten (Dutch Part),total arrivals,thousands,1775.0,1952.0,542.0,482.0,1217.0,1193.6,Go,,,


In [32]:
# Manually inserting the lat and long values for the final two destinations

merge_df.loc[merge_df["Country"] == "Curaçao", ["latitude", "longitude"]] = (12.1695, -68.990021)
merge_df.loc[merge_df["Country"] == "Sint Maarten (Dutch Part)", ["latitude", "longitude"]] = (18.0528, -63.0425)

In [33]:
merge_df[merge_df["latitude"].isna()] #Checking filled in

Unnamed: 0.1,Unnamed: 0,Country,Arrivals,Units,2018,2019,2020,2021,2022,Mean Total Arrivals,Recommendation,Destination Country,latitude,longitude


In [34]:
# Going forward we only need columns of tourism data with lat & long, so only keeping those.

cols = ["Unnamed: 0", "Country", "Arrivals", "Units", "2018", "2019", "2020", "2021", "2022", "Mean Total Arrivals", "Recommendation", "latitude", "longitude"]
full_recs_coords_df = merge_df[cols]

full_recs_coords_df.sample(30) # Checking looks right

Unnamed: 0.1,Unnamed: 0,Country,Arrivals,Units,2018,2019,2020,2021,2022,Mean Total Arrivals,Recommendation,latitude,longitude
26,158,Brazil,total arrivals,thousands,6621.0,6353.0,2146.0,746.0,3630.0,3899.2,Go,-14.235004,-51.92528
119,716,Montenegro,total arrivals,thousands,2077.0,2510.0,351.0,1554.0,2036.0,1705.6,Go,42.708678,19.37439
143,860,Poland,total arrivals,thousands,85946.0,88515.0,51076.0,51026.0,71814.0,69675.4,Go,51.919438,19.145136
78,470,Honduras,total arrivals,thousands,2323.0,2315.0,669.0,826.0,1911.0,1608.8,No Go,15.199999,-86.241905
13,80,Bahamas,total arrivals,thousands,6622.0,7250.0,1794.5,2101.0,7000.7,4953.64,No Go,25.03428,-77.39628
167,1004,Spain,total arrivals,thousands,124456.0,126170.0,36410.0,51631.0,104968.0,88727.0,No Go,40.463667,-3.74922
40,242,Colombia,total arrivals,thousands,4398.0,4531.0,1387.0,2146.0,4686.0,3429.6,Go,4.570868,-74.297333
177,1064,Thailand,total arrivals,thousands,38178.0,39916.0,6725.0,511.0,,21332.5,Go,15.870032,100.992541
131,788,Nigeria,total arrivals,thousands,5254.0,5361.0,1209.0,1246.0,1271.0,2868.2,No Go,9.081999,8.675277
48,290,Cuba,total arrivals,thousands,4712.0,4276.0,1086.0,356.4,1614.1,2408.9,Go,21.521757,-77.781167


In [35]:
# Double checking all 198 rows with no country, mean total arrivals, recommendation, lat or long missing. 

full_recs_coords_df.info() 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 198 entries, 0 to 197
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Unnamed: 0           198 non-null    int64  
 1   Country              198 non-null    object 
 2   Arrivals             198 non-null    object 
 3   Units                198 non-null    object 
 4   2018                 198 non-null    float64
 5   2019                 195 non-null    float64
 6   2020                 189 non-null    float64
 7   2021                 181 non-null    float64
 8   2022                 148 non-null    float64
 9   Mean Total Arrivals  198 non-null    float64
 10  Recommendation       198 non-null    object 
 11  latitude             198 non-null    float64
 12  longitude            198 non-null    float64
dtypes: float64(8), int64(1), object(4)
memory usage: 20.2+ KB


In [12]:
# # Saving as csv file ready for weather data to be added.

# complete_path = data_dir / "full_recommendations_with_coordinates.csv"

# full_recs_coords_df.to_csv(complete_path, index=False)