In [1]:
#
# This program reads a set of files with geographic and demographic data.
# The input data is organized with the following fields:
# - ISOALPHA,
# - COUNTRYNM,
# - NAME1,
# - NAME2,
# - CENTROID_X,
# - CENTROID_Y,
# - UN_2000_E,
# - UN_2005_E,
# - UN_2010_E,
# - UN_2015_E,
# - UN_2020_E
# For certain countries (ISOALPHA) at the second administrative level (NAME1) the script:
# - calculates the geographic centroid,
# - calculates the population-weighed centroids (from UN_2000-E to UN_2020_E).
# The results are saved to a common file.
#

In [2]:
#
# Version log
# 
# R0 (20200428)
# First trials.
#


In [3]:
# Import modules
import os, csv
from math import sin, cos, asin, acos
from math import radians as rads, degrees as degs


In [4]:
# IO Names.
# Directories:
RootDir = './IO Data/'
RootDir = 'D:/0 DOWN/00 PY RG/Maps/POP_CENTROID/IO Data/'

# Files:
FileNameO = RootDir + 'CEN R2.csv'


In [41]:
# Functions.
def f_lon(LON, X):
    '''
    Corrects LON, degrees.
    '''
    # Due to asin, to make it good between [-180, +180]:
    lon = LON
    if X < 0:
        lon = abs(lon) / lon * (180. - abs(lon))
    return lon

def f_LON_LAT(X, Y, Z):
    '''
    Returns LAT and LON based on X, Y, Z, degrees.
    '''
    if (X == 0 and Y == 0 and Z == 0):
        return 0., 0.
    else:
        lat_r = asin(Z)
        lon_d = degs(asin(Y / cos (lat_r)))
        lon_d = f_lon(lon_d, X)
        lat_d = degs(lat_r)
        return lon_d, lat_d


In [6]:
# Fields to retain.
# The fields of interest and their position in the database are:
# - ISOALPHA   : 0
# - NAME1      : 2
# - CENTROID_X : 4
# - CENTROID_Y : 5
# - TOTAL_A_KM : 6
# - UN_2000_E  : 7
# - UN_2005_E  : 8
# - UN_2010_E  : 9
# - UN_2015_E  : 10
# - UN_2020_E  : 11


In [58]:
# Read the data and operate with it.
# Container of the results, with header:
l_pop = []
l_pop.append(['ISO3', 'NAME1', 'AREA', 'LON', 'LAT', 
              'UN_2000_E', 'LON', 'LAT', 
              'UN_2005_E', 'LON', 'LAT',
              'UN_2010_E', 'LON', 'LAT', 
              'UN_2015_E', 'LON', 'LAT',
              'UN_2020_E', 'LON', 'LAT'])

# List of files:
l_file = [x for x in os.listdir(RootDir) if '_clean' in x]
#l_file = ['USA_clean.csv']

# List of countries:
l_ctry = ['CHN', 'RUS', 'USA', 'AUS', 'CAN', 'BRA']
#l_ctry = ['USA']

# Loop over files:
for FileName in l_file:   

    # Read the data, no manipulation yet:
    try:
        l_data  = []
        FileNameI = RootDir + FileName
        with open(FileNameI, 'r', encoding = 'utf-8') as file_i:
            csv_reader = csv.reader(file_i, delimiter = ',', quotechar = '"')
            for l_aux in csv_reader:            
                l_data.append(l_aux)

        print(str(len(l_data)) + ' records correctly read from ', FileName)

    except IndexError as error:
        print (error)
        print('Error at record no. ', len(l_data) + 1, 'FileName = ', FileName)
        print (l_aux)
    
    # Loop over each country in this file:
    for country in l_ctry:        
        # List of admin_level_2 units in this country:
        l_admin = list(set([x[2] for x in l_data if x[0] == country]))
        #l_admin = ['Alberta']
        
        for admin in l_admin:
            # Auxiliary container:
            l_aux = []

            # Centroids w/o population weighing:
            # Area:
            area = [float(x[6]) for x in l_data if x[0] == country and x[2] == admin]
            AREA = sum(area)
            if AREA > 0:
                coords = [[ cos(rads(float(x[5]))) * cos(rads(float(x[4]))) * float(x[6]),
                            cos(rads(float(x[5]))) * sin(rads(float(x[4]))) * float(x[6]),
                            sin(rads(float(x[5])))                          * float(x[6])] 
                          for x in l_data if x[0] == country and x[2] == admin]       
                X = sum([x[0] for x in coords]) / AREA
                Y = sum([x[1] for x in coords]) / AREA
                Z = sum([x[2] for x in coords]) / AREA
            else:
                X, Y, Z = 0, 0, 0
            LON, LAT = f_LON_LAT(X, Y, Z)
            l_aux.append(country)
            l_aux.append(admin)
            l_aux.append(AREA)
            l_aux.append(LON)
            l_aux.append(LAT)

            # For each year with data
            for i_year in range (7, 12, 1): # The index refers to the position of the population field in l_data.
                # Population:
                pop = [float(x[i_year].replace('\n','')) for x in l_data if x[0] == country and x[2] == admin]
                POP = sum(pop)
                if POP > 0:
                    coords = [[ cos(rads(float(x[5]))) * cos(rads(float(x[4]))) * float(x[i_year]),
                                cos(rads(float(x[5]))) * sin(rads(float(x[4]))) * float(x[i_year]),
                                sin(rads(float(x[5])))                          * float(x[i_year])]
                              for x in l_data if x[0] == country and x[2] == admin]

                    X = sum([x[0] for x in coords]) / POP
                    Y = sum([x[1] for x in coords]) / POP
                    Z = sum([x[2] for x in coords]) / POP
                else:
                    X, Y, Z = 0, 0, 0
                LON, LAT = f_LON_LAT(X, Y, Z)

                # Save data:
                l_aux.append(POP)
                l_aux.append(LON)
                l_aux.append(LAT)

            l_pop.append(l_aux)
        
    # File completed:
    print(str(len(l_data)) + ' records correctly manipulated.')

print('All records correctly manipulated.')


11036153 records correctly read from  USA_clean.csv
11036153 records correctly manipulated.
All records correctly manipulated.


In [59]:
# Save the cleaned records:
with open(FileNameO, 'w', newline = '') as f:
    writer = csv.writer(f)
    writer.writerows(l_pop)
