In [1]:
import pandas as pd
import psycopg2
import sys
import pandas
import math
from pandas.io import sql
import sqlalchemy
from sqlalchemy import create_engine
from datetime import datetime
import os
from configobj import ConfigObj
import contextlib

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from functions.utils import database

In [2]:
## define paths and variables

config_path = "/home/jovyan/shared/rural_planner_refactoring/config_files_test/config_co"

parser = ConfigObj(config_path)

sql_path =  parser['sql_path']
country_folder = parser['country_folder']
schema = parser['clustering_params']['schema']


output_table = parser['clustering_params']['output_table']
table_transport = parser['clustering_params']['table_transport']
table_infrastructure = parser['clustering_params']['table_infrastructure']
table_settlements = parser['clustering_params']['table_settlements']
table_coverage = parser['clustering_params']['table_coverage']
table_franchises = parser['clustering_params']['table_franchises']
table_nodes_original = parser['clustering_params']['table_nodes_original']
table_nodes = parser['clustering_params']['table_nodes']
table_schools   = parser['clustering_params']['table_schools']

radius = int(parser['clustering_params']['max_coverage_radius'])

In [4]:
#Create node table

query_path = sql_path + '/' + country_folder + '/' + 'clustering_create_node_table.sql'

with open(query_path) as file, database(parser) as db:
    query = file.read()
    query_formatted = query.format(schema = schema,
                               table_coverage = table_coverage,
                               table_settlements = table_settlements,
                               table_infrastructure = table_infrastructure,
                               table_nodes_original = table_nodes_original,
                               table_franchises = table_franchises,
                               table_transport = table_transport)
    db.execute(query_formatted)


In [5]:
#Create node table copy

query_path = sql_path + '/' + 'clustering_create_node_table_copy.sql'

with open(query_path) as file, database(parser) as db:
    query = file.read()
    query_formatted = query.format(schema = schema,
                                   table_nodes = table_nodes,
                               table_nodes_original = table_nodes_original)
    db.execute(query_formatted)


In [22]:
# Generate clusters with tx

query_path = sql_path + '/' + country_folder + '/' + 'clustering_towers_priority_1.sql'

with open(query_path) as file, database(parser) as db:
    query = file.read()
    query_formatted = query.format(schema = schema,
                             table_nodes = table_nodes,
                             radius = radius,
                             table_infrastructure = table_infrastructure,
                             table_coverage = table_coverage)
    clusters_towers_p1 = pd.read_sql_query(query_formatted, db)
    

In [7]:
# Delete already assigned nodes from node_table

query_path = sql_path + '/' + 'clustering_delete_assigned_nodes.sql'

excluded_ids = ''    

for i in range(0, len(clusters_towers_p1)-1):
    if (clusters_towers_p1.iloc[i]['nodes'] == ''):
        nodes = '\', '
    else:
        nodes = '\', ' + clusters_towers_p1.iloc[i]['nodes'] + ', '
    excluded_ids =   excluded_ids +  ' \'' + clusters_towers_p1.iloc[i]['centroid'] + nodes

if (clusters_towers_p1.iloc[i]['nodes'] == ''):
    nodes_end = ''
else:
    nodes_end = clusters_towers_p1.iloc[len(clusters_towers_p1)-1]['nodes'] + ', '
excluded_ids =   excluded_ids + nodes_end + ' \'' + clusters_towers_p1.iloc[len(clusters_towers_p1)-1]['centroid']+ '\''

with open(query_path) as file, database(parser) as db:
    query = file.read()
    query_formatted = query.format(schema = schema,
                             table_nodes = table_nodes,
                             excluded_ids = excluded_ids)
    db.execute(query_formatted)

In [8]:
#Clustering for towers with no tx

query_path = sql_path + '/' + country_folder + '/' + 'clustering_towers_priority_2.sql'

with open(query_path) as file, database(parser) as db:
    query = file.read()
    query_formatted = query.format(schema = schema,
                             table_nodes = table_nodes,
                             radius = radius,
                             table_infrastructure = table_infrastructure,
                             table_coverage = table_coverage)
    clusters_towers_p2 = pd.read_sql_query(query_formatted, db)
    

In [9]:
# Delete already assigned nodes from node_table

query_path = sql_path + '/' + 'clustering_delete_assigned_nodes.sql'

excluded_ids = ''

for i in range(0, len(clusters_towers_p2)-1):
    if (clusters_towers_p2.iloc[i]['nodes'] == ''):
        nodes = '\', '
    else:
        nodes = '\', ' + clusters_towers_p2.iloc[i]['nodes'] + ', '
    excluded_ids =   excluded_ids +  ' \'' + clusters_towers_p2.iloc[i]['centroid'] + nodes

if (clusters_towers_p2.iloc[i]['nodes'] == ''):
    nodes_end = ''
else:
    nodes_end = clusters_towers_p2.iloc[len(clusters_towers_p2)-1]['nodes'] + ', '
excluded_ids =   excluded_ids + nodes_end + ' \'' + clusters_towers_p2.iloc[len(clusters_towers_p2)-1]['centroid']+ '\''

with open(query_path) as file, database(parser) as db:
    query = file.read()
    query_formatted = query.format(schema = schema,
                             table_nodes = table_nodes,
                             excluded_ids = excluded_ids)
    db.execute(query_formatted)

In [10]:
#Delete nodes that are towers

query_path = sql_path + '/' + country_folder + '/' + 'clustering_delete_unwanted_nodes.sql'

with open(query_path) as file, database(parser) as db:
    query = file.read()
    query_formatted = query.format(schema = schema,
                                   table_nodes = table_nodes)
    db.execute(query_formatted)


In [11]:
#Clustering for SETTLEMENTS

query_path = sql_path + '/' + country_folder + '/' + 'clustering_settlements.sql'
del_query_path = sql_path + '/' + 'clustering_delete_assigned_nodes.sql'

with open(query_path) as file, open(del_query_path) as del_file, database(parser) as db:
    query = file.read()
    query_formatted = query.format(schema = schema,
                             table_nodes = table_nodes,
                             table_coverage = table_coverage,
                             table_franchises = table_franchises,
                             table_schools = table_schools,
                             radius = radius)
    
    del_query = del_file.read()
    del_query_formatted = del_query.format(schema = schema, 
                             table_nodes = table_nodes,
                            excluded_ids = excluded_ids)
    
    df_clusters_int = pd.read_sql_query(query_formatted, db)
    clusters_greenfield = pd.DataFrame(columns = df_clusters_int.columns)

    num_nodes = 0
    excluded_ids = ''
    
    #We cluster the rest of nodes iteratively
    i = 0
    while(not df_clusters_int.empty):
        i = i + 1

        excluded_ids = df_clusters_int.iloc[0]['nodes'] + ', \'' + df_clusters_int.iloc[0]['centroid'] + '\''

        del_query_formatted=del_query.format(schema = schema, 
                                 table_nodes = table_nodes,
                                excluded_ids = excluded_ids)

        db.execute(del_query_formatted) 

        num_nodes = num_nodes + df_clusters_int.iloc[0]['cluster_size']

        clusters_greenfield = clusters_greenfield.append(df_clusters_int, ignore_index=True)

        df_clusters_int = pd.read_sql_query(query_formatted,con=db)

        if(i%10 == 0):
            print(str(datetime.now()))
            print("Iteration " + str(i) + " with " + str(num_nodes) + " nodes clustered.")
            print(df_clusters_int)

In [247]:
#Create data frame with the unclustered nodes to include them as one-node clusters

query_path = sql_path + '/' + 'clustering_clusters_unclustered.sql'

with open(query_path) as file, database(parser) as db:
    query = file.read()
    query_formatted = query.format(schema = schema,
                                   table_nodes = table_nodes)
    clusters_unclustered = pd.read_sql_query(query_formatted, db)


In [248]:
clusters  = clusters_towers_p1.append(clusters_towers_p2, ignore_index=True).append(clusters_greenfield, ignore_index=True).append(clusters_unclustered, ignore_index=True)

In [126]:
#Create final data frame with all clusters        

query_path = sql_path + '/' + 'clustering_add_geom_clusters.sql'

with open(query_path) as file, database(parser) as db:    
    clusters.to_sql(output_table, con=db, if_exists = 'replace', schema = schema, index = False, 
                dtype = {'centroid_weight': sqlalchemy.types.Integer(),
                         'cluster_weight': sqlalchemy.types.Integer(),
                         'cluster_size': sqlalchemy.types.Integer()
                        })
    query = file.read()
    query_formatted = query.format(schema = schema,
                                   output_table = output_table,
                               table_nodes_original = table_nodes_original)
    db.execute(query_formatted)


  (attype, name))


In [127]:
#Create links data frame

query_path = sql_path + '/' + 'clustering_create_clusters_links.sql'

with open(query_path) as file, database(parser) as db:
    query = file.read()
    query_formatted = query.format(schema = schema,
                                   output_table = output_table,
                               table_nodes_original = table_nodes_original)
    db.execute(query_formatted)
