In [188]:
import sys
sys.path.append("../")

import pandas as pd
import plotly.express as px
from geopy import Nominatim, distance

from src.constants import centro_geografico_caba, centro_geografico_plata

# Transporte público

## Trenes

In [189]:
df_stops = pd.read_csv("../data/external/transporte_publico/trenes/stops.txt")

In [190]:
df_stops["type"] = "tren"
df_stops = df_stops.rename(columns={"stop_lat": "lat", "stop_lon": "lon"})

In [191]:
df_stops.head()

Unnamed: 0,stop_id,stop_name,lat,lon,type
0,154,González Catán,-34.771464,-58.646546,tren
1,178,Independencia,-34.760798,-58.617341,tren
2,180,Ing. Budge,-34.718663,-58.462152,tren
3,184,Isidro Casanova,-34.704464,-58.58799,tren
4,203,Justo Villegas,-34.708965,-58.551939,tren


In [192]:
df_routes = pd.read_csv("../data/external/transporte_publico/trenes/routes.txt")
df_routes = df_routes[["route_id", "route_short_name", "route_long_name"]]
df_routes = df_routes.drop_duplicates(keep="last")

In [193]:
df_trips = pd.read_csv("../data/external/transporte_publico/trenes/trips.txt")
df_trips = df_trips[['route_id',  'trip_id']]
df_trips = df_trips.drop_duplicates(keep="last")

In [194]:
df_stop_times = pd.read_csv("../data/external/transporte_publico/trenes/stop_times.txt")
df_stop_times = df_stop_times[['trip_id', 'stop_id']]
df_stop_times = df_stop_times.drop_duplicates(keep="last")

In [195]:
df_tren = df_routes.merge(df_trips, on="route_id")
df_tren = df_tren.merge(df_stop_times, on="trip_id")
df_tren = df_tren.merge(df_stops, on="stop_id")

df_tren = df_tren.drop_duplicates(subset=["lat", "lon"], keep="first")

df_tren.head(2)

Unnamed: 0,route_id,route_short_name,route_long_name,trip_id,stop_id,stop_name,lat,lon,type
0,1,Sarmiento,Sarmiento / Once - Moreno,32001211,70,Castelar,-34.652156,-58.642526,tren
626,1,Sarmiento,Sarmiento / Once - Moreno,32001211,279,Morón,-34.648223,-58.619813,tren


In [196]:
df_tren = df_tren[["route_id", "route_short_name", "route_long_name", "stop_id", "stop_name", "lat", "lon", "type"]]
df_tren = df_tren.rename(columns={"route_short_name": "linea", "route_long_name": "ramal"})
df_tren["ramal"] = df_tren["ramal"].str.split("/")
df_tren["ramal"] = df_tren["ramal"].apply(lambda x: x[1])

In [197]:
df_belgrano = pd.read_csv("../data/external/transporte_publico/trenes/tren_belgrano_norte.csv")
df_belgrano.head(2)

Unnamed: 0,linea,ramal,stop_name,lat,lon,type
0,Belgrano Norte,Grand Bourg,Retiro,-34.589842,-58.374486,tren
1,Belgrano Norte,Grand Bourg,Saldias,-34.575302,-58.399313,tren


In [198]:
df_tren = df_tren[["linea", "ramal", "stop_name", "lat", "lon", "type"]]
df_tren = pd.concat([df_tren, df_belgrano], axis=0, ignore_index=True)

In [199]:
df_tren.head(2)

Unnamed: 0,linea,ramal,stop_name,lat,lon,type
0,Sarmiento,Once - Moreno,Castelar,-34.652156,-58.642526,tren
1,Sarmiento,Once - Moreno,Morón,-34.648223,-58.619813,tren


In [200]:
color_scale0 = [(0, 'blue')]
color_scale1 = [(0, 'orange'), (1,'red')]

fig = px.scatter_mapbox(df_tren,
                        lat="lat",
                        lon="lon",
                        hover_data=["linea", "ramal", "stop_name"],
                        color="linea",
                        color_continuous_scale="bluered",
                        zoom=10,
                        height=900,
                        width=1600)
fig.update_layout(mapbox_style="carto-darkmatter")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [201]:
df_tren["dist_centro_caba"] = df_tren.apply(lambda x: distance.distance(centro_geografico_caba, [x.lat, x.lon]).km, axis=1)
df_tren["dist_centro_plata"] = df_tren.apply(lambda x: distance.distance(centro_geografico_plata, [x.lat, x.lon]).km, axis=1)

df_tren = df_tren.drop_duplicates(keep="last")

In [202]:
df_tren.to_parquet("../data/interim/transporte/trenes_all.parquet")

In [203]:
df_tren = df_tren[(df_tren.dist_centro_caba < 11) | (df_tren.dist_centro_plata < 17)]

df_tren.to_parquet("../data/interim/transporte/trenes_important.parquet", index=False, engine="pyarrow")

In [204]:
df_tren.head(2)

Unnamed: 0,linea,ramal,stop_name,lat,lon,type,dist_centro_caba,dist_centro_plata
4,Sarmiento,Once - Moreno,Ciudadela,-34.639923,-58.541123,tren,9.134687,62.10833
5,Sarmiento,Once - Moreno,Liniers,-34.638873,-58.526341,tren,7.815663,61.002599


In [205]:
color_scale0 = [(0, 'blue')]
color_scale1 = [(0, 'orange'), (1,'red')]

fig = px.scatter_mapbox(df_tren,
                        lat="lat",
                        lon="lon",
                        hover_data=["linea", "ramal", "stop_name"],
                        color="linea",
                        color_continuous_scale="bluered",
                        zoom=10,
                        height=900,
                        width=1600)
fig.update_layout(mapbox_style="carto-darkmatter")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

## Subte

In [206]:
df_subte = pd.read_csv("../data/external/transporte_publico/subte/estaciones-de-subte.csv")

In [207]:
df_subte = df_subte.rename(columns={"long": "lon","estacion":"stop_name", "id": "stop_id"})
df_subte["stop_id"] = df_subte["stop_id"].astype(int)

df_subte["type"] = "subte"

df_subte = df_subte[["linea", "stop_name", "lat", "lon", "type"]]

In [208]:
df_subte.head()

Unnamed: 0,linea,stop_name,lat,lon,type
0,H,CASEROS,-34.63575,-58.398928,subte
1,H,INCLAN - MEZQUITA AL AHMAD,-34.629376,-58.40097,subte
2,H,HUMBERTO 1°,-34.623092,-58.402323,subte
3,H,VENEZUELA,-34.615242,-58.404732,subte
4,H,ONCE - 30 DE DICIEMBRE,-34.608935,-58.406036,subte


In [209]:
df_subte.to_parquet("../data/interim/transporte/subte.parquet", index=False, engine="pyarrow")

## Premetro

In [210]:
df_premetro = pd.read_csv("../data/external/transporte_publico/premetro/estaciones-premetro.csv")

In [211]:
df_premetro = df_premetro.rename(columns={"long": "lon","nombre":"stop_name", "id": "stop_id"})
df_premetro["type"] = "premetro"
df_premetro = df_premetro[["linea", "stop_name", "lat", "lon", "type"]]

In [212]:
df_premetro.head()

Unnamed: 0,linea,stop_name,lat,lon,type
0,PREMETRO,INTENDENTE SAGUIER,-34.643757,-58.461491,premetro
1,PREMETRO,BALBASTRO (Cementerio de Flores),-34.648601,-58.456415,premetro
2,PREMETRO,ANA MARÍA JANER,-34.659499,-58.450047,premetro
3,PREMETRO,FERNANDEZ DE LA CRUZ,-34.662225,-58.446607,premetro
4,PREMETRO,PTE. ILLIA (Lacarra),-34.665368,-58.448513,premetro


In [213]:
df_premetro.to_parquet("../data/interim/transporte/premetro.parquet", index=False, engine="pyarrow")

## Transporte en CABA y La Plata

In [214]:
df_transporte = pd.concat([df_tren, df_subte, df_premetro])

In [215]:
df_transporte.to_parquet("../data/processed/transporte/transporte.parquet", index=False, engine="pyarrow")

In [216]:
df_transporte.head()

Unnamed: 0,linea,ramal,stop_name,lat,lon,type,dist_centro_caba,dist_centro_plata
4,Sarmiento,Once - Moreno,Ciudadela,-34.639923,-58.541123,tren,9.134687,62.10833
5,Sarmiento,Once - Moreno,Liniers,-34.638873,-58.526341,tren,7.815663,61.002599
6,Sarmiento,Once - Moreno,Villa Luro,-34.636249,-58.502131,tren,5.653983,59.26789
7,Sarmiento,Once - Moreno,Floresta,-34.632376,-58.480891,tren,3.735753,57.871035
8,Sarmiento,Once - Moreno,Flores,-34.62785,-58.466177,tren,2.344158,57.040749


## Hospitales

In [217]:
with open("../data/external/salud/hospitales_CABA.json") as f:
    hospitales = json.load(f)
hosp_caba = pd.DataFrame(hospitales["features"])
hosp_plata = pd.read_csv("../data/external/salud/hospitales_LaPlata.csv", delimiter=";", encoding='latin1')

In [218]:
hosp_caba["lon"] = hosp_caba["geometry"].apply(lambda x: x["coordinates"][0])
hosp_caba["lat"] = hosp_caba["geometry"].apply(lambda x: x["coordinates"][1])
hosp_caba["name"] = hosp_caba["properties"].apply(lambda x: x["NOMBRE"])

hosp_caba["province"] = str("Capital Federal")
hosp_caba = hosp_caba[["name", "lat", "lon", "province"]]

In [219]:
hosp_caba.head(2)

Unnamed: 0,name,lat,lon,province
0,HOSPITAL GENERAL DE NIÑOS PEDRO DE ELIZALDE,-34.628847,-58.377551,Capital Federal
1,HOSPITAL GENERAL DE NIÑOS RICARDO GUTIERREZ,-34.594192,-58.41207,Capital Federal


In [220]:
hosp_caba.to_parquet("../data/interim/salud/hospitales_caba.parquet", index=False, engine="pyarrow")

In [221]:
hosp_plata = hosp_plata.drop(["DIRECCION", "TELEFONO", "Unnamed: 5", "Unnamed: 6", "Unnamed: 7"], axis=1)
hosp_plata = hosp_plata.rename(columns={"LATITUD": "lat", "LONGITUD": "lon", "CLINICAS Y HOSPITALES": "name"})
hosp_plata["province"] = str("Bs.As. G.B.A. Zona Sur")

hosp_plata["lat"] = hosp_plata["lat"].str.replace(",", ".").replace(" ", "").astype(float)
hosp_plata["lon"] = hosp_plata["lon"].str.replace(",", ".").replace(" ", "").astype(float)

hosp_plata = hosp_plata[["name", "lat", "lon", "province"]]

In [222]:
hosp_plata.head(2)

Unnamed: 0,name,lat,lon,province
0,BREAST CLINIC S.A.,-34.907611,-57.959,Bs.As. G.B.A. Zona Sur
1,CEMPLAN,-34.91825,-57.942444,Bs.As. G.B.A. Zona Sur


In [223]:
hosp_plata.to_parquet("../data/interim/salud/hospitales_plata.parquet", index=False, engine="pyarrow")

In [224]:
df_hospital = pd.concat([hosp_caba, hosp_plata], axis=0, ignore_index=True)

In [225]:
df_hospital.to_parquet("../data/processed/salud/hospitales.parquet", index=False, engine="pyarrow")

In [226]:
df_hospital.head(2)

Unnamed: 0,name,lat,lon,province
0,HOSPITAL GENERAL DE NIÑOS PEDRO DE ELIZALDE,-34.628847,-58.377551,Capital Federal
1,HOSPITAL GENERAL DE NIÑOS RICARDO GUTIERREZ,-34.594192,-58.41207,Capital Federal


## Transporte y hospitales

In [227]:
color_scale0 = [(0, 'blue')]
color_scale1 = [(0, 'orange'), (1,'red')]

fig = px.scatter_mapbox(df_transporte,
                        lat="lat",
                        lon="lon",
                        hover_data=["linea", "ramal", "stop_name"],
                        color="linea",
                        color_continuous_scale="bluered",
                        zoom=10,
                        height=900,
                        width=1600)
fig.update_layout(mapbox_style="carto-darkmatter")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

### Hospitales

In [228]:
fig = px.scatter_mapbox(df_hospital,
                        lat="lat",
                        lon="lon",
                        hover_data=["name", "province"],
                        color="province",
                        color_continuous_scale="bluered",
                        zoom=10,
                        height=800,
                        width=1600)
    

fig.update_layout(mapbox_style="carto-darkmatter")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()