In [2]:
import csv
import time
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut


In [54]:
filename = 'mdg_admpop_adm2_2018.csv'
madagascar_pop = 30_437_261 # Total population in 2022 (https://www.worldometers.info/world-population/madagascar-population/)

In [55]:
# Read csv file 
data = list(csv.reader(open(filename, 'r')))

# Remove column 6 until 14
for i in range(len(data)):
    data[i] = data[i][:6] + data[i][15:]


# Display the first 5 rows of the csv file
for i in range(5):
    print(data[i])


['\ufeffADM0_PCODE', 'ADM0_EN', 'ADM1_PCODE', 'ADM1_EN', 'ADM2_PCODE', 'ADM2_EN', 'T_TL']
['MG', 'Madagascar', 'MG11', 'Analamanga', 'MG11101001A', '1er Arrondissement', '310355']
['MG', 'Madagascar', 'MG11', 'Analamanga', 'MG11101002A', '2e Arrondissement', '239056']
['MG', 'Madagascar', 'MG11', 'Analamanga', 'MG11101003A', '3e Arrondissement', '167055']
['MG', 'Madagascar', 'MG11', 'Analamanga', 'MG11101004A', '4e Arrondissement', '318488']


In [56]:
# Sum the population of all regions at the last column
total_population = 0
for row in data[1:]:
    total_population += int(row[-1])

print(f'Total population: {total_population}')
print(f'Madagascar population: {madagascar_pop}')
print(f'Percentage of Madagascar population: {total_population/madagascar_pop*100:.2f}%')

Total population: 26696214
Madagascar population: 30437261
Percentage of Madagascar population: 87.71%


In [57]:
# Scale the population of all regions to the population of Madagascar
new_population = 0
for row in data[1:]:
    row[-1] = int(int(row[-1]) / total_population * madagascar_pop)
    new_population += row[-1]

# Display the first 5 rows of the csv file
for i in range(5):
    print(data[i])

print(f'Total population: {new_population}')
print(f'Madagascar population: {madagascar_pop}')
print(f'Percentage of Madagascar population: {new_population/madagascar_pop*100:.2f}%')

['\ufeffADM0_PCODE', 'ADM0_EN', 'ADM1_PCODE', 'ADM1_EN', 'ADM2_PCODE', 'ADM2_EN', 'T_TL']
['MG', 'Madagascar', 'MG11', 'Analamanga', 'MG11101001A', '1er Arrondissement', 353846]
['MG', 'Madagascar', 'MG11', 'Analamanga', 'MG11101002A', '2e Arrondissement', 272555]
['MG', 'Madagascar', 'MG11', 'Analamanga', 'MG11101003A', '3e Arrondissement', 190465]
['MG', 'Madagascar', 'MG11', 'Analamanga', 'MG11101004A', '4e Arrondissement', 363118]
Total population: 30437196
Madagascar population: 30437261
Percentage of Madagascar population: 100.00%


In [58]:
# Write the new data to a new csv file
with open('mdg_admpop_adm2_2018_scaled.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerows(data)

In [None]:
# Add headers for latitude and longitude
data[0].extend(["Latitude", "Longitude"])  # Assuming first row is headers

# Initialize Nominatim geocoder
geolocator = Nominatim(user_agent="Lukraakman@gmail.com")

# Function to get latitude and longitude
def get_lat_lon(place, country="Madagascar"):
    try:
        location = geolocator.geocode(f"{place}, {country}", timeout=10)
        if location:
            return [location.latitude, location.longitude]
    except GeocoderTimedOut:
        return [None, None]
    return [None, None]

# Process each row (skipping header)
for row in data[1:]:
    adm2_name = row[5]  # Assuming ADM2_EN is the 6th column (index 5)
    lat, lon = get_lat_lon(adm2_name)
    row.extend([lat, lon])
    time.sleep(2)  # To prevent rate-limiting issues

# Save the updated list back to CSV
with open("file_with_coordinates.csv", "w", encoding="utf-8", newline="") as file:
    writer = csv.writer(file)
    writer.writerows(data)

print("CSV updated with latitude and longitude!")

CSV updated with latitude and longitude!


Manually check csv-file and fill in missing data

In [11]:
# Load manually updated CSV
data = list(csv.reader(open("file_with_coordinates++.csv", "r")))
for i in range(5):
    print(data[i])

# Correctly format the data
for i in range(len(data)):
    data[i] = [data[i][5]] + [data[i][3]] + [data[i][1]] + data[i][7:9] + ['', ''] + [data[i][6]]
data[0] = ['#name', 'region', 'country', 'latitude', 'longitude', 'location_type', 'conflict_data', 'population']

for i in range(5):
    print(data[i])

# Save the updated list back to CSV
with open("locations.csv", "w", encoding="utf-8", newline="") as file:
    writer = csv.writer(file)
    writer.writerows(data)

['\ufeffADM0_PCODE', 'ADM0_EN', 'ADM1_PCODE', 'ADM1_EN', 'ADM2_PCODE', 'ADM2_EN', 'T_TL', 'Latitude', 'Longitude']
['MG', 'Madagascar', 'MG11', 'Analamanga', 'MG11101001A', '1er Arrondissement', '353846', '-18.906071549181178', ' 47.512099521071725']
['MG', 'Madagascar', 'MG11', 'Analamanga', 'MG11101002A', '2e Arrondissement', '272555', '-18.92606704738931', ' 47.5498013821388']
['MG', 'Madagascar', 'MG11', 'Analamanga', 'MG11101003A', '3e Arrondissement', '190465', '-18.895760608313903', ' 47.52891895256092']
['MG', 'Madagascar', 'MG11', 'Analamanga', 'MG11101004A', '4e Arrondissement', '363118', '-18.934043032141478', ' 47.51311939047174']
['#name', 'region', 'country', 'latitude', 'longitude', 'location_type', 'conflict_data', 'population']
['1er Arrondissement', 'Analamanga', 'Madagascar', '-18.906071549181178', ' 47.512099521071725', '', '', '353846']
['2e Arrondissement', 'Analamanga', 'Madagascar', '-18.92606704738931', ' 47.5498013821388', '', '', '272555']
['3e Arrondissement