In [1]:
# UrbanClassifier Toolkit -- 1 Degree (Aprox 110 Km)

In [2]:
# Import Libraries

In [3]:
import gdal
import math
import numpy as np
import json
import pandas as pd

In [4]:
# Open TIFF File, and convert raster to multidimensional array format - convert into a matrix that is stored as a multidimensional array
# NOTE : Matrix is very large

In [5]:
src = gdal.Open('gpw_v4_population_count_rev11_2020_1_deg.tif')
myarray = np.array(src.GetRasterBand(1).ReadAsArray())

In [6]:
np.savetxt("1_deg", myarray)

In [7]:
# GeoTrans takes the Header Data from the Raster File to make proper Calculations

In [8]:
geoTrans = src.GetGeoTransform()

In [9]:
# geoTrans is stored as the following : (-180.0, 0.00833333333333333, 0.0, 89.99999999999991, 0.0, -0.00833333333333333)

In [10]:
# Coordinates Conversion Function : 

In [11]:
def world2Pixel(geoMatrix, x, y):
    
    # Uses a gdal geomatrix ( gdal.GetGeoTransform() ) to calculate the pixel location of a geospatial coordinate
    
    ulX = geoMatrix[0]
    ulY = geoMatrix[3]
    xDist = geoMatrix[1]
    yDist = geoMatrix[5]
    rtnX = geoMatrix[2]
    rtnY = geoMatrix[4]
    pixel = int((x - ulX) / xDist)
    line = int((ulY - y) / abs(yDist))
    
    return (pixel, line)

In [12]:
def population(arr, x, y):
    
    try:
    
        pixel = world2Pixel(geoTrans, x, y)[0]
        line = world2Pixel(geoTrans, x, y)[1]
        return arr[line][pixel]
        
    except: 
        
        return 'Error'

In [13]:
# EXAMPLES :

In [14]:
population(myarray, -74.0059728, 40.7127753)

7761219.0

In [15]:
# Testing Stories

In [16]:
def readFile(filepath): 
    
    with open(filepath, 'r') as f:
        stories = json.load(f)
        
    return list(stories)

In [17]:
publications = readFile('publications.json')

In [18]:
def extractCoordinates(stories):
    
    result = []
    
    for story in stories:
        
        # stories have 'title' attribute
        # publications have 'name' attribute
        
        
        obj = {
            'title' : story['name'],
            'longitude' : story['longitude'],
            'latitude' : story['latitude'],
            
            # for publications -> must seperate
            
            'city' : story['city'],
            'state': story['state'],
            'country': story['country']
        }
        
        result.append(obj)
        
        
    return result

In [19]:
coordinates = extractCoordinates(publications)

In [20]:
# Example -> Coordinate Object Format

In [21]:
coordinates[0]

{'title': 'Diplomat East Africa',
 'longitude': 37.906193,
 'latitude': -0.023559,
 'city': '',
 'state': '',
 'country': 'Kenya'}

In [22]:
def test(stories, bound, arr):
    
    coordinates = extractCoordinates(stories)
    
    
    for coordinate in coordinates:
                
            urban = False
            x = coordinate['longitude']
            y = coordinate['latitude']
            total = population(arr, x, y)
        
            if (total == 'Error' ):
            
                coordinate['urban'] = 'Error'
                coordinate['bound'] = str(bound) + ' People'
                coordinate['distance'] = 'Approximately 110 Km'
                coordinate['total'] = 'Error'
        
        
        
            else:
        
                if (total >= bound):
                    urban = True

                coordinate['urban'] = urban
                coordinate['bound'] = str(bound) + ' People'
                coordinate['distance'] = 'Approximately 110 Km'
                coordinate['total'] = str(population(arr, x, y))
            
        
    return coordinates

In [23]:
test = test(publications, 50000, myarray)

In [24]:
test[0]

{'title': 'Diplomat East Africa',
 'longitude': 37.906193,
 'latitude': -0.023559,
 'city': '',
 'state': '',
 'country': 'Kenya',
 'urban': True,
 'bound': '50000 People',
 'distance': 'Approximately 110 Km',
 'total': '3265681.5'}

In [25]:
def extract(stories, bound, arr):
    
    test_set = test(stories, bound, arr)
    
    with open('Tests/1_Degree_Population_bound=' + str(bound) + '.json', 'w') as json_file:
        json.dump(test_set, json_file)
    
    return

In [26]:
# Example -> 
# extract(stories, 50000, myarray)

In [27]:
# Convert to DataFrame

In [28]:
def toDataFrame(test_set):
    
    df = pd.DataFrame(columns = ['title' , 'longitude', 'latitude' , 'city', 'state', 'country', 'urban', 'bound', 'distance', 'total']) 
    for test in test_set:
        i = test_set.index(test)
        df.loc[i] = test
        
    return df

In [29]:
# Example -> 
# df = toDataFrame(test)

In [30]:
def toCSV(test_set, filename):
    
    df = toDataFrame(test_set)
    urban = df.loc[df["urban"] == True]
    not_urban = df.loc[df["urban"] == False]
    error = df.loc[df["urban"] == "Error"]
    
    df.to_csv(filename + "_all.csv", encoding='utf-8', index=False)
    urban.to_csv(filename + "_classification=urban.csv", encoding='utf-8', index=False)
    not_urban.to_csv(filename + "_classification=not_urban.csv", encoding='utf-8', index=False)
    not_urban.to_csv(filename + "_classification=error.csv", encoding='utf-8', index=False)
    
    urban_percentage = ( len(urban) / len(df)) * 100
    not_urban_percentage = ( len(not_urban) / len(df)) * 100
    error_percentage = ( len(error) / len(df)) * 100
    
    
    return (urban_percentage, not_urban_percentage, error_percentage)

In [88]:
toCSV(test, "Tests/publications")

(94.31818181818183, 1.7045454545454544, 3.977272727272727)