In [33]:
# UrbanClassifier Toolkit -- 1 Degree (Aprox 110 Km)

In [34]:
# Import Libraries

In [165]:
import gdal
import math
import numpy as np
import json
import pandas as pd

In [166]:
# Open TIFF File, and convert raster to multidimensional array format - convert into a matrix that is stored as a multidimensional array
# NOTE : Matrix is very large

In [167]:
src = gdal.Open('gpw_v4_population_count_rev11_2020_1_deg.tif')
myarray = np.array(src.GetRasterBand(1).ReadAsArray())

In [168]:
# GeoTrans takes the Header Data from the Raster File to make proper Calculations

In [169]:
geoTrans = src.GetGeoTransform()

In [170]:
# geoTrans is stored as the following : (-180.0, 0.00833333333333333, 0.0, 89.99999999999991, 0.0, -0.00833333333333333)

In [171]:
# Coordinates Conversion Function : 

In [172]:
def world2Pixel(geoMatrix, x, y):
    
    # Uses a gdal geomatrix ( gdal.GetGeoTransform() ) to calculate the pixel location of a geospatial coordinate
    
    ulX = geoMatrix[0]
    ulY = geoMatrix[3]
    xDist = geoMatrix[1]
    yDist = geoMatrix[5]
    rtnX = geoMatrix[2]
    rtnY = geoMatrix[4]
    pixel = int((x - ulX) / xDist)
    line = int((ulY - y) / abs(yDist))
    
    return (pixel, line)

In [173]:
def population(arr, x, y):
    
    try:
    
        pixel = world2Pixel(geoTrans, x, y)[0]
        line = world2Pixel(geoTrans, x, y)[1]
        return arr[line][pixel]
        
    except: 
        
        return 'Error'

In [174]:
# EXAMPLES :

In [175]:
population(myarray, -74.0059728, 40.7127753)

7761219.0

In [176]:
# Testing Stories

In [177]:
def readFile(filepath): 
    
    with open(filepath, 'r') as f:
        stories = json.load(f)
        
    return list(stories)

In [194]:
publications = readFile('publications.json')

In [195]:
def extractCoordinates(stories):
    
    result = []
    
    for story in stories:
        
        # stories have 'title' attribute
        # publications have 'name' attribute
        
        
        obj = {
            'title' : story['name'],
            'longitude' : story['longitude'],
            'latitude' : story['latitude'],
            
            # for publications -> must seperate
            
            'city' : story['city'],
            'state': story['state'],
            'country': story['country']
        }
        
        result.append(obj)
        
        
    return result

In [196]:
coordinates = extractCoordinates(stories)

In [197]:
# Example -> Coordinate Object Format

In [198]:
coordinates[0]

{'title': 'Diplomat East Africa',
 'longitude': 37.906193,
 'latitude': -0.023559,
 'city': '',
 'state': '',
 'country': 'Kenya'}

In [199]:
def test(stories, bound, arr):
    
    coordinates = extractCoordinates(stories)
    
    
    for coordinate in coordinates:
                
            urban = False
            x = coordinate['longitude']
            y = coordinate['latitude']
            total = population(arr, x, y)
        
            if (total == 'Error' ):
            
                coordinate['urban'] = 'Error'
                coordinate['bound'] = str(bound) + ' People'
                coordinate['radius'] = '110 Km Squared'
                coordinate['total'] = 'Error'
        
        
        
            else:
        
                if (total >= bound):
                    urban = True

                coordinate['urban'] = urban
                coordinate['bound'] = str(bound) + ' People'
                coordinate['radius'] = '110 Km Squared'
                coordinate['total'] = str(population(arr, x, y))
            
        
    return coordinates

In [200]:
test = test(publications, 50000, myarray)

In [201]:
test[0]

{'title': 'Diplomat East Africa',
 'longitude': 37.906193,
 'latitude': -0.023559,
 'city': '',
 'state': '',
 'country': 'Kenya',
 'urban': True,
 'bound': '50000 People',
 'radius': '110 Km Squared',
 'total': '3265681.5'}

In [202]:
def extract(stories, bound, arr):
    
    test_set = test(stories, bound, arr)
    
    with open('Tests/1_Degree_Population_bound=' + str(bound) + '.json', 'w') as json_file:
        json.dump(test_set, json_file)
    
    return

In [203]:
# Example -> 
# extract(stories, 50000, myarray)

In [204]:
# Convert to DataFrame

In [205]:
def toDataFrame(test_set):
    
    df = pd.DataFrame(columns = ['title' , 'longitude', 'latitude' , 'city', 'state', 'country', 'urban', 'bound', 'radius', 'total']) 
    for test in test_set:
        i = test_set.index(test)
        df.loc[i] = test
        
    return df

In [209]:
# Example -> 
# df = toDataFrame(test)

In [207]:
def toCSV(test_set, filename):
    
    toDataFrame(test_set).to_csv(filename, encoding='utf-8', index=False)
    
    return

In [208]:
toCSV(test, "Tests/publications.csv")