Importing libraries for further usage

In [1]:
# removing warnings
import warnings
warnings.filterwarnings('ignore')

# computational needs
import networkx as nx
import pandas as pd

# scraping for data
import urllib.request
from bs4 import BeautifulSoup 

# plotting
from matplotlib import pyplot as plt

# local files
import roads

# others
from unidecode import unidecode
import random, json
import numpy as np, numpy.random

useful methods for later

## Read City and Road data

In [2]:
def get_cities():
    
    def remove_duplicates(seq):
        seen = set()
        seen_add = seen.add
        return [x for x in seq if not (x in seen or seen_add(x))]

    # get city data from the two sources
    cities_area = pd.read_csv('geo_data/cities_area.csv',"\t")
    cities_popu = pd.read_csv('geo_data/cities_popu.csv',",")
    cities_subs = pd.read_csv('geo_data/cities_subs.csv')
    cities_empl = pd.read_csv("geo_data/cities_empl.csv",'\t')

    # get the cities where the main roads pass from in turkey
    translate = {}
    for e in range(len( cities_subs)):
        translate[ cities_subs.iloc[e]['city'] ] = cities_subs.iloc[e]['admin']

    # get main roads in Turkey
    main_roads = []
    for road in roads.get_cities_in_main_roads():
        main_roads.append( remove_duplicates([translate[i] for i in road]))

    main_cities = list(cities_subs[ cities_subs['city'] == cities_subs['admin']]['city'])
    all_cities = set(sum(main_roads, [])+ main_cities)

    # merge the two data
    cities = pd.merge( cities_subs[['city','lat','lng']], cities_area[['Name','Area(km²)']]
                        , how='outer', left_on='city', right_on='Name')
    cities['old'] = [unidecode(i)  if str(i)!= 'nan' else 'NaN' for i in list(cities['Name'])  ]
    new_cities = pd.merge( cities, cities_popu, how='outer',left_on='old', right_on='asciiname')
    cities = new_cities[ new_cities['city'].isin(all_cities)].reset_index()
    cities = cities[['city','lat','lng','Area(km²)','population']]
    cities_empl = cities_empl.rename(columns={ "İl adı Province name":"city",    "Oran Rate (%)":"Labour",
                                             'Oran Rate (%).1':"Unemployment", 'Oran Rate (%).2':"Employment"})
    cities_empl = cities_empl[['city','Labour','Unemployment','Employment']]
    cities = pd.merge( cities, cities_empl, how="outer")
    return cities

def create_graph():
    cities = get_cities()
    # Put everything in a graph
    G = nx.Graph()
    # create each city
    for city in list(cities['city']):
        # find position of city in map
        city_properties = cities[cities['city'] == city].iloc[0]
        # add the city to the graph
        G.add_node( city_properties['city'], 
                   pos  =    ( city_properties[ 'lat'], city_properties[ 'lng']), 
                   size = int( city_properties[ 'population']),
                   area = int( city_properties[ 'Area(km²)'].replace(',','')),
                   unem = int( city_properties[ 'Unemployment']))
    # put the edges connecting the cities
    # if found city in road, but not in data, fix manually
    for road in main_roads:
        # if any of the paths is not in the list, then mention it, fix it manually
        for each in road:
            if each not in list(cities['city']):
                print("City not found in list: ",each) # TODO: deal automatically

        # remind the cities that are present
        for i in range(len(road)-1):
            # add a connection between each two cities
            G.add_edge( road[i], road[i+1], weight=10)

    # connect the cities not in the list with the closest city in the list
    not_found = set( main_cities)- set(sum(main_roads, []))

    for each in not_found:
        city = cities[ cities['city'] == each].iloc[0]
        # order cities present in list by distance to this city
        found = cities[cities['city'].isin( all_cities)]
        found['distance'] = ((found['lat']-city['lat'])**2 + (found['lng']-city['lng'])**2)
        closest_city = found.sort_values('distance').iloc[1]['city'] # get the closest
        # add road connecting these cities
        G.add_edge( city['city'], closest_city, weight=10)
    return G

### Connect the cities

### Get the zone where the refugees enter, connect those cities

In [3]:
# suggest area from Adana to Van
max_lat = cities[cities['city'] ==  'Van']['lat'].iloc[0]
max_lng = cities[cities['city'] ==  'Van']['lng'].iloc[0]
min_lng = cities[cities['city'] =='Adana']['lng'].iloc[0]

# filter
syrian_zone = list(cities[ ( cities[ 'lng'] <= max_lng) & 
                           ( cities[ 'lng'] >= min_lng) & 
                           ( cities[ 'lat'] <= max_lat)]['city'])

# connect all cities close to Syria with the closest cities in this area
for each in syrian_zone:
    
    city = cities[ cities['city'] == each].iloc[0]
    
    # order cities present in list by distance to this city
    found = cities[cities['city'].isin( [n for n in syrian_zone if n != each])]
    found['distance'] = ((found['lat']-city['lat'])**2 + (found['lng']-city['lng'])**2)
    closest_cities = found.sort_values('distance') 
    
    # add road connecting these cities
    # keep the weight lower to suggest less people can move in between
    G.add_edge( city['city'], closest_cities.iloc[0]['city'], weight=100)

NameError: name 'cities' is not defined

In [None]:
# Picture the map
pos = {city:(long, lat) for (city, (lat,long)) in nx.get_node_attributes(G, 'pos').items()}
nx.draw(G, pos, with_labels=False, node_size=10, node_color='g', edge_color='b')