# Static Data of the lines and stops of the urban buses in EMT Madrid
In this notebook we are going to transform the data found in gtfs format to a geodataframe format, which is easier to work with. We can find the gtfs at the following link: 
https://transitfeeds.com/p/emt-madrid/212/20200309/download

After downloading it, we have to unzip it, and then we are ready to work with the .txt files found inside

In [2]:
#We load the modules of interest
import pandas as pd
import json
%matplotlib inline

In [6]:
#Load line_stops_dict
with open('../Data/Static/line_stops_dict.json', 'r') as f:
    line_stops_dict = json.load(f)

## Load routes data

In [9]:
#We load the route data into a dataframe
routes = pd.read_csv('M6/routes.txt')
#And we show the dataframe
routes.head()

## Load stops data

In [5]:
#We load the dataframe of the stops
stops = pd.read_csv('M6/stops.txt').drop(['stop_id','stop_url','stop_desc','location_type','parent_station','zone_id'], axis=1)
stops = stops.rename(columns={'stop_code': 'id','stop_lat':'lat','stop_lon':'lon'})
stops.head()

FileNotFoundError: [Errno 2] File b'M6/stops.txt' does not exist: b'M6/stops.txt'

In [5]:
#And we save it to a file with a geojson format
stops.to_csv('M6/stops.csv')
#Finally we show the first five rows of the dataframe
stops.describe()

Unnamed: 0,id,lat,lon
count,4730.0,4730.0,4730.0
mean,2742.564059,40.424836,-3.682491
std,1805.45062,0.039964,0.047375
min,1.0,40.3322,-3.83603
25%,1302.25,40.392343,-3.713307
50%,2632.5,40.424915,-3.685675
75%,4092.75,40.457565,-3.649503
max,50011.0,40.51721,-3.54249


## Load stop times data

In [6]:
#Add distance of each one to beggining of line
stop_times = pd.read_csv('M6/stop_times.txt')#[['stop_id','shape_dist_traveled']].drop_duplicates()

def separate_trip_id(df):
    day_types,lines,bus_pos,bus_trip = [],[],[],[]
    for row in df.itertuples():
        day_types.append(row.trip_id[0:2])
        lines.append(int(row.trip_id[2:5]))
        bus_pos.append(int(row.trip_id[5:8]))
        bus_trip.append(int(row.trip_id[8:]))
    
    df['day_type'] = day_types
    df['line'] = lines
    df['bus_pos'] = bus_pos
    df['bus_trip'] = bus_trip
    
    return df

stop_times = separate_trip_id(stop_times)[['day_type','line','bus_pos','bus_trip','stop_id','stop_sequence','shape_dist_traveled']]
stop_times.head()

Unnamed: 0,day_type,line,bus_pos,bus_trip,stop_id,stop_sequence,shape_dist_traveled
0,FE,1,1,1,4514,1,0
1,FE,1,1,1,4022,2,295
2,FE,1,1,1,3687,3,485
3,FE,1,1,1,737,4,759
4,FE,1,1,1,735,5,1019


In [7]:
lines = [1,44,82,91,92,99,132,133,502,506]
bus_trips = [1,2]
bus_pos = 1
stop_times_reduced = stop_times.loc[(stop_times.line.isin(lines))&(stop_times.bus_trip.isin(bus_trips))&(stop_times.bus_pos==bus_pos)]
stop_times_reduced.head()

Unnamed: 0,day_type,line,bus_pos,bus_trip,stop_id,stop_sequence,shape_dist_traveled
0,FE,1,1,1,4514,1,0
1,FE,1,1,1,4022,2,295
2,FE,1,1,1,3687,3,485
3,FE,1,1,1,737,4,759
4,FE,1,1,1,735,5,1019


## Load frequencies data

In [8]:
#Add distance of each one to beggining of line
frequencies = pd.read_csv('M6/frequencies.txt')
#Parse the  datetime
frequencies['start_time'] = frequencies.start_time.apply(lambda x: str(int(x[0:2])%24)+x[2:] if int(x[0:2])>=24 else x)
frequencies['start_time'] = pd.to_datetime(frequencies['start_time'],format='%H:%M:%S').dt.time
frequencies['end_time'] = frequencies.end_time.apply(lambda x: str(int(x[0:2])%24)+x[2:] if int(x[0:2])>=24 else x)
frequencies['end_time'] = pd.to_datetime(frequencies['end_time'],format='%H:%M:%S').dt.time

#Separate trip id parts
frequencies = separate_trip_id(frequencies)[['day_type','line','bus_pos','bus_trip','start_time','end_time','headway_secs']]
frequencies = frequencies.sort_values(by=['line','start_time'],ascending=True)
frequencies.loc[(frequencies.line==1)&((frequencies.bus_pos%2)==0)&(frequencies.day_type=='LA')][-20:]

Unnamed: 0,day_type,line,bus_pos,bus_trip,start_time,end_time,headway_secs
164,LA,1,4,12,17:59:00,19:04:59,780
229,LA,1,8,11,18:11:00,19:16:59,720
252,LA,1,10,10,18:28:00,19:29:59,720
131,LA,1,2,13,18:40:00,19:44:59,720
197,LA,1,6,12,18:52:00,19:59:59,720
165,LA,1,4,13,19:05:00,20:14:59,780
230,LA,1,8,12,19:17:00,20:14:59,720
253,LA,1,10,11,19:30:00,20:30:59,900
132,LA,1,2,14,19:45:00,20:43:59,900
198,LA,1,6,13,20:00:00,20:56:59,900


## Load shapes of the lines data

In [9]:
#We load the dataframe with the shapes of the lines and rename its columns
shapes = pd.read_csv('M6/shapes.txt').rename(columns={"shape_pt_sequence":"sequence","shape_dist_traveled":"dist_traveled", "shape_pt_lat": "lat", "shape_pt_lon": "lon"})
#And show the dataframe
shapes.head()

Unnamed: 0,shape_id,lat,lon,sequence,dist_traveled
0,001_A,40.438475,-3.717931,1,0
1,001_A,40.438193,-3.717836,2,30
2,001_A,40.437973,-3.717876,3,55
3,001_A,40.437833,-3.717896,4,70
4,001_A,40.436803,-3.718076,5,186


In [10]:
def transform_route_id(df) :
    '''
    Adds a new column with the short id and the direction to the dataframe
    
        Params
        ---
        df: DataFrame
    '''
    new_df = df
    
    #Function that adds the line id
    def get_line_id(row) :
        if (row['shape_id'][1] == '0') & (row['shape_id'][0] == '0') :
            return int(row['shape_id'][2])
        elif row['shape_id'][0] == '0':
            return int(row['shape_id'][1:3])
        else :
            return int(row['shape_id'][0:3])
    
    #Function that adds the direction
    def get_route_direction(row) :
        if row['shape_id'][4] == 'A':
            return 1
        else:
            return 2
        
    #Function that adds the short name of the line
    def get_line_short_name(row) :
        return routes.loc[routes.route_id == row.line_id].iloc[0]['route_short_name']
    
    
    new_df["line_id"] = new_df.apply(get_line_id, axis=1)
    new_df["line_sn"] = new_df.apply(get_line_short_name, axis=1)
    new_df["direction"] = new_df.apply(get_route_direction, axis=1)
    
    return new_df[['shape_id','line_id','line_sn','direction','sequence','dist_traveled','lat','lon']]

#We call the functions and show the result
lines_shapes = transform_route_id(shapes)
lines_shapes.to_csv('M6/lines_shapes.csv')
lines_shapes.head()

Unnamed: 0,shape_id,line_id,line_sn,direction,sequence,dist_traveled,lat,lon
0,001_A,1,1,1,1,0,40.438475,-3.717931
1,001_A,1,1,1,2,30,40.438193,-3.717836
2,001_A,1,1,1,3,55,40.437973,-3.717876
3,001_A,1,1,1,4,70,40.437833,-3.717896
4,001_A,1,1,1,5,186,40.436803,-3.718076


In [12]:
#With this line we can take a look at all the bus line ids in the geodataframe
lines_shapes.line_id.unique()

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  14,
        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
        28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,
        54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,
        67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
        81,  82,  83,  85,  86,  87,  90,  91,  92,  93,  96,  99, 100,
       101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
       114, 115, 116, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
       128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
       141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,
       155, 156, 160, 161, 162, 165, 166, 171, 172, 173, 174, 175, 176,
       177, 178, 200, 203, 210, 215, 247, 310, 361, 362, 372, 401, 402,
       403, 404, 451, 452, 453, 454, 455, 456, 457, 481, 501, 50

In [13]:
#With this line we can take a look at all the bus line ids in the geodataframe
lines_shapes.line_sn.unique()

array(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
       '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24',
       '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35',
       '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46',
       '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57',
       '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', 'C1',
       'C2', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79',
       '81', '82', '83', '85', '86', '87', 'E', 'F', 'G', 'A', 'H', 'U',
       '100', '101', '102', '103', '104', '105', '106', '107', '108',
       '109', '110', '111', '112', '113', '114', '115', '116', '118',
       '119', '120', '121', '122', '123', '124', '125', '126', '127',
       '128', '129', '130', '131', '132', '133', '134', '135', '136',
       '137', '138', '139', '140', '141', '142', '143', '144', '145',
       '146', '147', '148', '149', '150', '151', '152', '153', '155',

## Create lines collected dictionary
Dictionary with the short name, destinations, stops and distances to that stops for each of the lines collected

In [17]:
#Distance to every stop in line dictionary
destinations_sn = {
    '1' : ('1', ('CRISTO REY', 'PROSPERIDAD')),
    '44' : ('44', ('CALLAO', 'MARQUES DE VIANA') ),
    '82' : ('82', ('MONCLOA', 'PITIS') ),
    '132' : ('132', ('MONCLOA', 'HOSPITAL LA PAZ') ),
    '133' : ('133', ('CALLAO', 'MIRASIERRA') ),
    '91' : ('F', ('CUATRO CAMINOS', 'CIUDAD UNIVERSITARIA') ),
    '92' : ('G', ('MONCLOA', 'CIUDAD UNIVERSITARIA') ),
    '99' : ('U', ('AVENIDA SENECA', 'PARANINFO') ),
    '502' : ('N2', ('CIBELES', 'VALDEBEBAS') ),
    '506' : ('N6', ('CIBELES', 'LAS ROSAS') )
}

line_sns = ['1','44','82','F','G','U','132','133','N2','N6']
line_ids = ['1','44','82','91','92','99','132','133','502','506']
lines_collected_dict = {}
for i in range(10) :
    line_sn = line_sns[i]
    line_id = line_ids[i]
    
    lines_collected_dict[line_sn] = {}
    lines_collected_dict[line_sn]['line_id'] = line_id
    lines_collected_dict[line_sn]['destinations'] = destinations_sn[line_id][1]
    for direction in ['1','2'] :
        stops = line_stops_dict[line_id][direction]
        lines_collected_dict[line_sn][direction] = {}
        lines_collected_dict[line_sn][direction]['length'] = str(lines_shapes.loc[(lines_shapes['line_id']==int(line_id))&(lines_shapes['direction']==int(direction))].dist_traveled.max()) 
        lines_collected_dict[line_sn][direction]['stops'] = stops 
        lines_collected_dict[line_sn][direction]['distances'] = {}
        
        trip_index = 1
        day_type = 'LA'
        stops_data = stop_times.loc[(stop_times.day_type==day_type)&(stop_times.line==int(line_id))&(stop_times.bus_pos==1)&(stop_times.bus_trip==trip_index)]
        while (int(stops[3]) not in stops_data.stop_id.unique().tolist()) and (trip_index < 6): 
            trip_index += 1
            stops_data = stop_times.loc[(stop_times.day_type==day_type)&(stop_times.line==int(line_id))&(stop_times.bus_pos==1)&(stop_times.bus_trip==trip_index)]
            if trip_index > 3 :
                day_type = 'FE'
        for stop in stops :
            stop_data = stops_data.loc[stops_data.stop_id == int(stop)].iloc[0]
            lines_collected_dict[line_sn][direction]['distances'][stop] = str(stop_data.shape_dist_traveled)

with open('M6Data/lines_collected_dict.json', 'w') as fp:
    json.dump(lines_collected_dict, fp)
lines_collected_dict

{'1': {'line_id': '1',
  'destinations': ('CRISTO REY', 'PROSPERIDAD'),
  '1': {'length': '8818',
   'stops': ['4514',
    '4022',
    '3687',
    '737',
    '735',
    '193',
    '173',
    '171',
    '169',
    '723',
    '724',
    '5138',
    '164',
    '70',
    '162',
    '423',
    '424',
    '425',
    '426',
    '731',
    '729',
    '727',
    '726',
    '2304',
    '721',
    '717',
    '715',
    '745',
    '273'],
   'distances': {'4514': '0',
    '4022': '295',
    '3687': '485',
    '737': '759',
    '735': '1019',
    '193': '1296',
    '173': '1541',
    '171': '1939',
    '169': '2275',
    '723': '2569',
    '724': '2842',
    '5138': '2998',
    '164': '3203',
    '70': '3537',
    '162': '3979',
    '423': '4479',
    '424': '4782',
    '425': '5084',
    '426': '5360',
    '731': '5654',
    '729': '6094',
    '727': '6526',
    '726': '6756',
    '2304': '7132',
    '721': '7483',
    '717': '7840',
    '715': '8202',
    '745': '8611',
    '273': '8818'}},
  '2'

# URBAN NETWORK OF MADRID GRAPH

In [1]:
import networkx as nx