# **Preparation of PIMUS Network for GTAModel**

### **General pipeline overview**

![methology](imgs/pipeline.jpg)

### **Pipeline**

* **Install modules**

In [None]:
!pip install \
    --extra-index-url=https://pypi.nvidia.com \
    cudf-cu11 dask-cudf-cu11 cuml-cu11 cugraph-cu11 cuspatial-cu11 cuproj-cu11 cuxfilter-cu11 cucim



**Convenience of use RAPIDS**


![speedup](https://developer-blogs.nvidia.com/wp-content/uploads/2023/03/performance-comparison-pandas-cudf-1-625x386.png)

In [None]:
!nvidia-smi

* **Import modules**

In [1]:
import cudf
import cuspatial
import cupy
import geopandas
from glob import glob
import pandas as pd
import numpy as np
from shapely.geometry import *
from shapely import wkt
import zipfile
#local import
from Converters import Converters 

* **Convert Net file to csv's**

In [None]:
#unzip the Network file
!unzip NetModel/BaseVisumProject/network.zip
#create dir to export csv
!mkdir Base_Network
conv = Converters()
#convert .net to csv in the created dir
conv.net2csv('network.net','Base_Network/')

* **Extract shapefiles from each object of the network**

* **Load all objects as dataframes**

In [19]:
objDFs = {(f.split('/')[-1]).split('.')[0]: pd.read_csv(f) for f in glob('Base_Network/*.csv')}
list(objDFs.keys())

  objDFs = {(f.split('/')[-1]).split('.')[0]: pd.read_csv(f) for f in glob('Base_Network/*.csv')}


['Transferwalktimesbetweenstopareas',
 'Faceitems',
 'Modes',
 'Lineroutes',
 'Operators',
 'Edges',
 'Transferfares',
 'Blockitemtypes',
 'Timeprofiles',
 'Validdays',
 'Turns',
 'Mainzones',
 'Linktypes',
 'Zones',
 'Calendarperiods',
 'Surfaces',
 'Linerouteitems',
 'User-definedattributes',
 'Timeprofileitems',
 'Stoppoints',
 'Blockversions',
 'Stops',
 'Connectors',
 'Faremodel',
 'Stopareas',
 'Tickettypes',
 'Faces',
 'PuTdirections',
 'Intermediatepoints',
 'Faresystems',
 'Nodes',
 'Lines',
 'Links',
 'Network',
 'Demandsegments',
 'Time-varyingattributes',
 'Aliases',
 'Transportsystems',
 'Surfaceitems',
 'Vehiclecombinationitems',
 'Vehiclejourneys',
 'Points',
 'Versionblock',
 'FaresystemtickettypesbyDSeg',
 'Faresupplements',
 'Vehiclejourneysections',
 'Vehiclecombinations',
 'Linkpolygons',
 'Vehicleunits']

In [20]:
!nvidia-smi

Thu Aug 31 07:58:25 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.86.10              Driver Version: 535.86.10    CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce GTX 1660 ...    On  | 00000000:01:00.0  On |                  N/A |
| N/A   67C    P5              12W /  60W |    149MiB /  6144MiB |     35%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

* **Filter all the objects with TSys proposals information**

In [21]:
# extract objects with the TSys proposals
props = ['tren_aero', 'regional', 'tren', 'corredor','alimentadora corredor']
# get the files to modify where the TSys appear
p = !grep -lR "Corredor"
# remove non csv files and get the key name
kPropFiles = [(v.split('/')[-1]).split('.')[0] for v in p if v.endswith(".csv")]
print(f'[INFO] Objects to clean: {kPropFiles}')

[INFO] Objects to clean: ['Faresystems', 'Transportsystems']


In [22]:
# extract codes of the proposals TransportSystems 
objWcode = [k for k in kPropFiles if 'CODE' in objDFs[k].columns.tolist()]
cProps = [list((objDFs[o][objDFs[o]['NAME'].str.lower() == p].CODE).to_dict().values()) for p in props for o in objWcode]
cProps = [c[0] for c in cProps if len(c) > 0] 
print(cProps)
# extract no. of the proposals TransportSystems 
objWno = list(set(kPropFiles) - set(objWcode))
nProps = [list((objDFs[o][objDFs[o]['NAME'].str.lower() == p].NO).to_dict().values()) for p in props for o in objWno]
nProps = [n[0] for n in nProps if len(n) > 0] 
print(nProps)

['RE', 'S', 'C', 'AC']
[13, 12, 11, 10, 9]


In [27]:
def cleanByList(df,column: str, values: list):
    for v in values:
        df = df[df[column].str.lower() != v]
    return df
    
for k in kPropFiles:
#cleaning objects by TSys proposals names
    objDFs[k] = cleanByList(objDFs[k],'NAME', props)
objDFs[kPropFiles[0]]

Unnamed: 0,NO,NAME,RANK,JOINTFARECOMPUTATION,FAREWEIGHT,INITIALFARE,TSYSSETNONPUTLINE\n
0,1,Alimentadora,1,0,1.0,12.0,
1,2,Metro,1,0,1.0,4.5,
2,3,Remanente,1,0,1.0,12.0,
3,4,BRT,1,0,1.0,14.6,
4,5,TM,1,0,1.0,4.5,
5,6,MBUS,1,0,1.0,12.0,
6,7,Remanente 15,1,0,1.0,15.0,
7,8,Remanente 17,1,0,1.0,17.0,


In [25]:
def cleanBycode(s:str) -> str:
    if isinstance(s,str):
        if len(s.split(',')) >0:
            return ','.join(set (s.split(',')) - set (cProps))
    return s
def dummy(r):
    return r
#cleaning objects by TSys proposals codes and removing nulls
objWtsset = [k for k in objDFs.keys() if 'TSYSSET' in objDFs[k].columns.tolist()]
print(f'[INFO] Objects to clean: {objWtsset}')
print(objDFs[objWtsset[0]])
for o in objWtsset:
    objDFs[o] = objDFs[o][~objDFs[o]['TSYSSET'].isnull()]
    objDFs[o]['TSYSSET'] = objDFs[o]['TSYSSET'].apply(cleanBycode)
objDFs[objWtsset[0]] 

[INFO] Objects to clean: ['Modes', 'Turns', 'Linktypes', 'Stoppoints', 'Connectors', 'Links', 'Vehicleunits']
     CODE                        NAME                    TSYSSET  \
0  Auto_C                  Auto Cuota                     Auto_C   
1   Ca1_C     Ca1 Camion articulado 1                      Ca1_C   
2   Ca2_C     Ca2 Camion articulado 2                      Ca2_C   
3   Ca_BD  Camion articulado Big Data                      Ca_BD   
4   Cu_BD    Camion unitario Big Data                      Cu_BD   
5    Cu_C          Cu Camion unitario                       Cu_C   
6     Pub                     Publico  Ex,MBUS,Al,M,W,R,RAE,T,TM   

   INTERCHANGEABLE\n  
0                  1  
1                  1  
2                  1  
3                  1  
4                  1  
5                  1  
6                  1  


Unnamed: 0,CODE,NAME,TSYSSET,INTERCHANGEABLE\n
0,Auto_C,Auto Cuota,Auto_C,1
1,Ca1_C,Ca1 Camion articulado 1,Ca1_C,1
2,Ca2_C,Ca2 Camion articulado 2,Ca2_C,1
3,Ca_BD,Camion articulado Big Data,Ca_BD,1
4,Cu_BD,Camion unitario Big Data,Cu_BD,1
5,Cu_C,Cu Camion unitario,Cu_C,1
6,Pub,Publico,"Ex,MBUS,Al,M,W,R,RAE,T,TM",1


* **Read and filter links for walk only not associated with stops** 

In [28]:
objDFs['Links']

Unnamed: 0,NO,FROMNODENO,TONODENO,NAME,TYPENO,TSYSSET,USERDIRECTION,LENGTH,NUMLANES,PLANNO,...,TYPE_LINK_CGA,TYPE_LINK_PRV,VEL_CGA,VEL_FLUJO,VEL_PRV,VEL_PUB,VIALIDAD_PROY,VOL_CAP,VOL_TESC,VOL_TPER\n
0,1,107820,107948,,0,W,0,0.503km,0,0,...,99,0,0.0,0,,,,0,0.0,0.0
1,1,107948,107820,,91,"Ca1_C,Cu_BD,Ca2_C,Ca_BD,W,Auto_C,Cu_C,R",1,0.503km,2,0,...,99,91,22.0,40,30.0,26.795,,1,0.0,0.0
2,2,2327,2328,,0,W,0,0.047km,0,0,...,99,0,0.0,0,,,0.0,0,0.0,0.0
3,2,2328,2327,,1,W,1,0.047km,0,0,...,99,1,0.0,0,,,0.0,0,0.0,0.0
4,3,2329,2330,,0,W,0,0.263km,0,0,...,99,0,0.0,0,,,0.0,0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
376313,201549,140382,131119,,0,"R,W,Auto_C",1,0.044km,1,0,...,99,0,0.0,30,,0.000,,0,0.0,0.0
376316,201553,116460,140834,,94,"Ca1_C,Cu_BD,Ca2_C,Ca_BD,Auto_C,Cu_C",0,11.980km,3,0,...,99,94,60.0,80,60.0,0.000,0.0,1,0.0,0.0
376317,201553,140834,116460,,94,"Ca1_C,Cu_BD,Ca2_C,Ca_BD,Auto_C,Cu_C",1,11.980km,3,0,...,99,94,60.0,80,60.0,0.000,0.0,1,0.0,0.0
376318,201554,140287,140834,,94,"Ca1_C,Cu_BD,Ca2_C,Ca_BD,Auto_C,Cu_C",0,6.652km,3,0,...,99,94,60.0,80,60.0,0.000,0.0,1,0.0,0.0


In [53]:
nd = list(objDFs['Stoppoints']['NO'].unique())
2330 in nd

True

In [54]:
#filtering links for only walk not associated with stops
#removing nulls values
objDFs['Links'] = objDFs['Links'][~objDFs['Links']['TSYSSET'].isnull()]
#removing only walk links not associated with stops
objDFs['Links'] = objDFs['Links'].query('TSYSSET != "W" | @nd in FROMNODENO | @nd in TONODENO')
#displaye head
objDFs['Links']

Unnamed: 0,NO,FROMNODENO,TONODENO,NAME,TYPENO,TSYSSET,USERDIRECTION,LENGTH,NUMLANES,PLANNO,...,TYPE_LINK_CGA,TYPE_LINK_PRV,VEL_CGA,VEL_FLUJO,VEL_PRV,VEL_PUB,VIALIDAD_PROY,VOL_CAP,VOL_TESC,VOL_TPER\n
0,1,107820,107948,,0,W,0,0.503km,0,0,...,99,0,0.0,0,,,,0,0.0,0.0
1,1,107948,107820,,91,"Ca1_C,Cu_BD,Ca2_C,Ca_BD,W,Auto_C,Cu_C,R",1,0.503km,2,0,...,99,91,22.0,40,30.0,26.795,,1,0.0,0.0
4,3,2329,2330,,0,W,0,0.263km,0,0,...,99,0,0.0,0,,,0.0,0,0.0,0.0
5,3,2330,2329,,90,"Ca1_C,Cu_BD,Ca2_C,Ca_BD,W,Auto_C,Cu_C,R",1,0.263km,2,0,...,99,90,19.0,30,28.0,23.822,0.0,0,0.0,0.0
10,6,2335,2336,,90,"Ca1_C,Cu_BD,Ca2_C,Al,Ca_BD,W,Auto_C,Cu_C,R",0,0.332km,1,0,...,99,90,33.0,30,33.0,22.787,,0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
376313,201549,140382,131119,,0,"R,W,Auto_C",1,0.044km,1,0,...,99,0,0.0,30,,0.000,,0,0.0,0.0
376316,201553,116460,140834,,94,"Ca1_C,Cu_BD,Ca2_C,Ca_BD,Auto_C,Cu_C",0,11.980km,3,0,...,99,94,60.0,80,60.0,0.000,0.0,1,0.0,0.0
376317,201553,140834,116460,,94,"Ca1_C,Cu_BD,Ca2_C,Ca_BD,Auto_C,Cu_C",1,11.980km,3,0,...,99,94,60.0,80,60.0,0.000,0.0,1,0.0,0.0
376318,201554,140287,140834,,94,"Ca1_C,Cu_BD,Ca2_C,Ca_BD,Auto_C,Cu_C",0,6.652km,3,0,...,99,94,60.0,80,60.0,0.000,0.0,1,0.0,0.0


* **Import, clean and prune non-stop nodes without links**

In [55]:
#display nodes
print(len(objDFs['Nodes']))
objDFs['Nodes'].head()

128975


Unnamed: 0,NO,CODE,NAME,TYPENO,CONTROLTYPE,MAINNODENO,USEMETHODIMPATNODE,METHODIMPATNODE,AUTOLINKORIENTATION,XCOORD,...,ICAMETHODALLWAYSTOP,ICAMETHODROUNDABOUT,ICAMETHODSIGNALIZED,ICAMETHODTWOWAYSTOP,VISTRODOANALYSIS,SIGNALWARRANTPOPISBELOW10000,ICAPEDESTRIANWALKINGSPEED,ICARIGHTTURNPEDESTRIANFACTOR,ICACONFIDENCELEVEL,ICASHAREOFCAVS\n
0,2327,,,0,0,0,0,0,1,-11152520.0,...,HCM 2010,HCM 2010,HCM 2010,HCM 2010,0,0,4km/h,Toronto,0.95,0.0
1,2328,,,0,0,0,0,0,1,-11152520.0,...,HCM 2010,HCM 2010,HCM 2010,HCM 2010,0,0,4km/h,Toronto,0.95,0.0
2,2329,,,0,0,0,0,0,1,-11139560.0,...,HCM 2010,HCM 2010,HCM 2010,HCM 2010,0,0,4km/h,Toronto,0.95,0.0
3,2330,,,0,0,0,0,0,1,-11139520.0,...,HCM 2010,HCM 2010,HCM 2010,HCM 2010,0,0,4km/h,Toronto,0.95,0.0
4,2331,,,0,0,0,0,0,1,-11134990.0,...,HCM 2010,HCM 2010,HCM 2010,HCM 2010,0,0,4km/h,Toronto,0.95,0.0


In [56]:
#display stop points
print(len(objDFs['Stoppoints']))
objDFs['Stoppoints'].head()

4842


Unnamed: 0,NO,STOPAREANO,CODE,NAME,TYPENO,TSYSSET,DIRECTED,NODENO,FROMNODENO,LINKNO,...,DEFDWELLTIME,DEPOTVEHCOMBSET,DEPOTCAP(1),DEPOTMINLAYOVERTIME(1),DEPOTVEHCOMBSTANDARD,DEPOTCAPSTANDARD,DEPOTMINLAYOVERTIMESTANDARD,COSTRATE1,COSTRATE2,COSTRATE3\n
0,2329,2329,2329,,0,R,0,2329,,,...,0s,,0,0s,0,0,0s,0.0,0.0,0.0
1,2330,2330,2330,,0,R,0,2330,,,...,0s,,0,0s,0,0,0s,0.0,0.0,0.0
2,2336,2336,2336,,0,"R,Al",0,2336,,,...,0s,,0,0s,0,0,0s,0.0,0.0,0.0
3,2354,2354,2354,,0,R,0,2354,,,...,0s,,0,0s,0,0,0s,0.0,0.0,0.0
4,2363,2363,2363,,0,R,0,2363,,,...,0s,,0,0s,0,0,0s,0.0,0.0,0.0


In [64]:
#get the different nodes in the cleaned network
n1 = list(objDFs['Links'] ['FROMNODENO'].unique())
n2 = list(objDFs['Links'] ['TONODENO'].unique())
#get the union
n = set(n1).union(set(n2))
#filter non stop nodes without links
objDFs['Nodes'] = objDFs['Nodes'].query('@n in NO | @nd in NO')
objDFs['Nodes']

Unnamed: 0,NO,CODE,NAME,TYPENO,CONTROLTYPE,MAINNODENO,USEMETHODIMPATNODE,METHODIMPATNODE,AUTOLINKORIENTATION,XCOORD,...,ICAMETHODALLWAYSTOP,ICAMETHODROUNDABOUT,ICAMETHODSIGNALIZED,ICAMETHODTWOWAYSTOP,VISTRODOANALYSIS,SIGNALWARRANTPOPISBELOW10000,ICAPEDESTRIANWALKINGSPEED,ICARIGHTTURNPEDESTRIANFACTOR,ICACONFIDENCELEVEL,ICASHAREOFCAVS\n
2,2329,,,0,0,0,0,0,1,-1.113956e+07,...,HCM 2010,HCM 2010,HCM 2010,HCM 2010,0,0,4km/h,Toronto,0.95,0.0
3,2330,,,0,0,0,0,0,1,-1.113952e+07,...,HCM 2010,HCM 2010,HCM 2010,HCM 2010,0,0,4km/h,Toronto,0.95,0.0
8,2335,,,0,0,0,0,0,1,-1.113530e+07,...,HCM 2010,HCM 2010,HCM 2010,HCM 2010,0,0,4km/h,Toronto,0.95,0.0
9,2336,,,0,0,0,0,0,1,-1.113493e+07,...,HCM 2010,HCM 2010,HCM 2010,HCM 2010,0,0,4km/h,Toronto,0.95,0.0
10,2337,,,0,0,0,0,0,1,-1.116986e+07,...,HCM 2010,HCM 2010,HCM 2010,HCM 2010,0,0,4km/h,Toronto,0.95,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
128936,140810,,,0,0,0,0,0,1,-1.116174e+07,...,HCM 2010,HCM 2010,HCM 2010,HCM 2010,0,0,4km/h,Toronto,0.95,0.0
128937,140811,,,0,0,0,0,0,1,-1.115564e+07,...,HCM 2010,HCM 2010,HCM 2010,HCM 2010,0,0,4km/h,Toronto,0.95,0.0
128942,140816,,,0,0,0,0,0,1,-1.112689e+07,...,HCM 2010,HCM 2010,HCM 2010,HCM 2010,0,0,4km/h,Toronto,0.95,0.0
128943,140817,,,0,0,0,0,0,1,-1.113081e+07,...,HCM 2010,HCM 2010,HCM 2010,HCM 2010,0,0,4km/h,Toronto,0.95,0.0


* **Remove Network folder with csvs**

In [None]:
!rm -rf Networks