# Cálculo de Matriz de Distancia Lineal Distrital

## Importamos Librerias

In [5]:
import os
import datetime
import pandas as pd
import geopandas as gpd
from access import Access

## Establecemos Fechas y Directorio de Trabajo

In [6]:
# create date (today)
hoy = datetime.datetime.today()
fecha = str(hoy.day).zfill(2)+str(hoy.month).zfill(2)+str(hoy.year)

# project working directory
ruta_proy = "D:/00_Documentos/00_GitHub/Matriz_OD_Distritos/"
os.chdir(ruta_proy)

## Leer Datos de [Distritos](https://www.datosabiertos.gob.pe/dataset/codigos-equivalentes-de-ubigeo-del-peru)

In [7]:
df_dist = pd.read_csv("./01_inputs/TB_UBIGEOS.csv", dtype=str)
df_dist = df_dist[['ubigeo_inei', 'distrito', 'latitud', 'longitud','pob_distrito']]

## Pre-Procesamiento de Datos

In [8]:
# duplicates
df_dist.drop_duplicates(subset = 'ubigeo_inei', inplace = True)

# create origin
df_orig = df_dist.copy(deep = True)
df_orig = df_orig.rename(columns = {'ubigeo_inei':'origin','pob_distrito':'pob_origin'})
df_orig = gpd.GeoDataFrame(df_orig, geometry=gpd.points_from_xy(df_orig.longitud, df_orig.latitud), crs="EPSG:4326")

# create destination
df_dest = df_dist.copy(deep = True)
df_dest = df_dest.rename(columns = {'ubigeo_inei':'dest','pob_distrito':'pob_dest'})
df_dest = gpd.GeoDataFrame(df_dest, geometry=gpd.points_from_xy(df_dest.longitud, df_dest.latitud), crs="EPSG:4326")

## Cálculo de Matriz Lineal

In [9]:
A = Access(demand_df            = df_orig,
           demand_index         = 'origin',
           demand_value         = 'pob_origin',
           supply_df            = df_dest,
           supply_index         = 'dest',
           supply_value         = 'pob_dest')


A.demand_df = A.demand_df.to_crs(epsg = 3528)
A.supply_df = A.supply_df.to_crs(epsg = 3528)

A.create_euclidean_distance(threshold = 1300000000) # Loreto: 368851000
df_orig.shape[0]*df_dest.shape[0]

3583449

## Merge Datos

In [10]:
mat_lin = A.cost_df
mat_lin = pd.merge(pd.merge(mat_lin, df_orig, on = 'origin', how = 'left'), df_dest, on = 'dest', how = 'left')
mat_lin = mat_lin[['origin', 'dest', 'euclidean', 'latitud_x', 'longitud_x', 'latitud_y', 'longitud_y']]
mat_lin.columns = ['origin', 'dest', 'euclidean', 'latitud_o', 'longitud_o', 'latitud_d', 'longitud_d']
mat_lin

Unnamed: 0,origin,dest,euclidean,latitud_o,longitud_o,latitud_d,longitud_d
0,010101,010101,0.000000,-6.2294,-77.8728,-6.2294,-77.8728
1,010101,010102,28688.638357,-6.2294,-77.8728,-6.0325,-77.7108
2,010101,010103,70141.422733,-6.2294,-77.8728,-6.8358,-78.0197
3,010101,010104,19582.015890,-6.2294,-77.8728,-6.2556,-77.7008
4,010101,010105,22819.938680,-6.2294,-77.8728,-6.0783,-77.7375
...,...,...,...,...,...,...,...
3511871,240106,240102,6039.293344,-3.6278,-80.4336,-3.6014,-80.4806
3511872,240106,240103,17570.154652,-3.6278,-80.4336,-3.6372,-80.59
3511873,240106,240104,7339.180399,-3.6278,-80.4336,-3.6933,-80.4392
3511874,240106,240105,1956.594392,-3.6278,-80.4336,-3.6408,-80.4453


## Exportar Matriz

In [11]:
mat_lin.to_csv("./03_outputs/01_mat_lineal.csv", encoding="latin8", index = False)