<a href="https://colab.research.google.com/github/PawinData/UC/blob/master/DistanceMatrix.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd
from functions import shortest_path
from geopy.distance import vincenty

In [0]:
# 10 counties in South California

COUNTIES = ["San Luis Obispo", "Kern", "Santa Barbara", "Ventura", "Los Angeles",
            "Orange", "Riverside", "San Bernardino", "San Diego", "Imperial"]
COUNTIES.sort()

In [0]:
# build the network of adjacency
NETWORK = dict()
NETWORK["San Luis Obispo"] = ["Kern", "Santa Barbara", "Ventura"]
NETWORK["Kern"] = ["San Luis Obispo", "Santa Barbara", "Ventura", "Los Angeles", "San Bernardino"]
NETWORK["Santa Barbara"] = ["San Luis Obispo", "Kern", "Ventura"]
NETWORK["Ventura"] = ["San Luis Obispo", "Kern", "Santa Barbara", "Los Angeles"]
NETWORK["Los Angeles"] = ["Kern", "Ventura", "San Bernardino", "Orange"]
NETWORK["Orange"] = ["Los Angeles", "San Bernardino", "Riverside", "San Diego"]
NETWORK["Riverside"] = ["Orange", "San Bernardino", "San Diego", "Imperial"]
NETWORK["San Bernardino"] = ["Kern", "Los Angeles", "Orange", "Riverside"]
NETWORK["San Diego"] = ["Orange", "Riverside", "Imperial"]
NETWORK["Imperial"] = ["San Diego", "Riverside"]

In [58]:
# create the adjacency matrix
D = dict()
for A in COUNTIES:
    vector = list()
    for B in COUNTIES:
        if B in NETWORK[A]:
            vector.append(1)
        else:
            vector.append(0)
    D[A] = vector

ADJACENCY = pd.DataFrame(D, index=COUNTIES)
ADJACENCY

Unnamed: 0,Imperial,Kern,Los Angeles,Orange,Riverside,San Bernardino,San Diego,San Luis Obispo,Santa Barbara,Ventura
Imperial,0,0,0,0,1,0,1,0,0,0
Kern,0,0,1,0,0,1,0,1,1,1
Los Angeles,0,1,0,1,0,1,0,0,0,1
Orange,0,0,1,0,1,1,1,0,0,0
Riverside,1,0,0,1,0,1,1,0,0,0
San Bernardino,0,1,1,1,1,0,0,0,0,0
San Diego,1,0,0,1,1,0,0,0,0,0
San Luis Obispo,0,1,0,0,0,0,0,0,1,1
Santa Barbara,0,1,0,0,0,0,0,1,0,1
Ventura,0,1,1,0,0,0,0,1,1,0


In [0]:
# save the adjacency matrix
ADJACENCY.to_csv("AdjacencyMatrix.csv", sep=",")

In [62]:
# construct distance matrix
# distance = the number of times one has to cross county borders to walk from A to B
D = dict()
for A in COUNTIES:
    vector = list()
    for B in COUNTIES:
        vector.append(shortest_path(ADJACENCY, A, B))
    D[A] = vector

DISTANCE = pd.DataFrame(D, index=COUNTIES)
DISTANCE

Unnamed: 0,Imperial,Kern,Los Angeles,Orange,Riverside,San Bernardino,San Diego,San Luis Obispo,Santa Barbara,Ventura
Imperial,0,3,3,2,1,2,1,4,4,4
Kern,3,0,1,2,2,1,3,1,1,1
Los Angeles,3,1,0,1,2,1,2,2,2,1
Orange,2,2,1,0,1,1,1,3,3,2
Riverside,1,2,2,1,0,1,1,3,3,3
San Bernardino,2,1,1,1,1,0,2,2,2,2
San Diego,1,3,2,1,1,2,0,4,4,3
San Luis Obispo,4,1,2,3,3,2,4,0,1,1
Santa Barbara,4,1,2,3,3,2,4,1,0,1
Ventura,4,1,1,2,3,2,3,1,1,0


In [0]:
# save the distance matrix of shortest network
DISTANCE.to_csv("DistanceMatrix_shortestnetworks.csv", sep=",")

In [0]:
# coordinate each county by its most populous city
# the longitude and lattitude of city hall provided Google
COORD = dict()
COORD["Imperial"] = (32.792814, -115.567149)
COORD["Kern"] = (35.373311, -119.020485)
COORD["Los Angeles"] = (34.053918, -118.242621)
COORD["Orange"] = (33.835108, -117.912468)
COORD["Riverside"] = (33.980751, -117.375525)
COORD["San Bernardino"] = (34.102358, -117.435083)
COORD["San Diego"] = (32.717213, -117.162620)
COORD["San Luis Obispo"] = (35.282899, -120.662728)
COORD["Santa Barbara"] = (34.949782, -120.435179)
COORD["Ventura"] = (34.200315, -119.180195)

In [65]:
# construct distance matrix
D = dict()
for A in COUNTIES:
    vector = list()
    for B in COUNTIES:
        distance = vincenty(COORD[A],COORD[B]).km
        vector.append(round(distance,4))
    D[A] = vector

DISTANCE = pd.DataFrame(D, index=COUNTIES)
DISTANCE

Unnamed: 0,Imperial,Kern,Los Angeles,Orange,Riverside,San Bernardino,San Diego,San Luis Obispo,Santa Barbara,Ventura
Imperial,0.0,428.3314,285.4321,247.0956,213.6985,226.3982,149.7461,545.5045,509.9167,370.2322
Kern,428.3314,0.0,162.7891,198.6079,215.8432,202.3772,340.9076,149.6513,137.1865,130.9466
Los Angeles,285.4321,162.7891,0.0,38.9948,80.5001,74.7291,179.0992,260.3505,224.5408,87.9993
Orange,247.0956,198.6079,38.9948,0.0,52.2204,53.1531,142.309,299.1507,262.8661,123.9026
Riverside,213.6985,215.8432,80.5001,52.2204,0.0,14.5671,141.5341,334.2205,300.9491,168.3159
San Bernardino,226.3982,202.3772,74.7291,53.1531,14.5671,0.0,155.7052,323.42,291.0277,161.2997
San Diego,149.7461,340.9076,179.0992,142.309,141.5341,155.7052,0.0,430.6978,391.217,249.468
San Luis Obispo,545.5045,149.6513,260.3505,299.1507,334.2205,323.42,430.6978,0.0,42.3802,181.2539
Santa Barbara,509.9167,137.1865,224.5408,262.8661,300.9491,291.0277,391.217,42.3802,0.0,142.029
Ventura,370.2322,130.9466,87.9993,123.9026,168.3159,161.2997,249.468,181.2539,142.029,0.0


In [0]:
# save the distance matrix computed by populous centers
DISTANCE.to_csv("DistanceMatrix_populous.csv", sep=",")