# Static Data of the lines and stops of the urban buses in EMT Madrid
In this notebook we are going to transform the data found in gtfs format to a geodataframe format, which is easier to work with. We can find the gtfs at the following link: 
https://transitfeeds.com/p/emt-madrid/212/20200309/download

After downloading it, we have to unzip it, and then we are ready to work with the .txt files found inside

In [2]:
#We load the modules of interest
import pandas as pd
%matplotlib inline

In [3]:
#We load the route data into a dataframe
routes = pd.read_csv('M6/routes.txt')
#And we show the dataframe
routes.head()

Unnamed: 0,route_id,agency_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color
0,1,EMT,1,Plaza de Cristo Rey - Prosperidad,,3,http://www.emtmadrid.es/aplicaciones/Itinerari...,0178BC,FFFFFF
1,2,EMT,2,Plaza de Manuel Becerra - Avenida Reina Victoria,,3,http://www.emtmadrid.es/aplicaciones/Itinerari...,0178BC,FFFFFF
2,3,EMT,3,Puerta de Toledo - Plaza de San Amaro,,3,http://www.emtmadrid.es/aplicaciones/Itinerari...,0178BC,FFFFFF
3,4,EMT,4,Plaza de Ciudad Lineal - Puerta de Arganda,,3,http://www.emtmadrid.es/aplicaciones/Itinerari...,0178BC,FFFFFF
4,5,EMT,5,Puerta del Sol/sevilla - Estacion de Chamartin,,3,http://www.emtmadrid.es/aplicaciones/Itinerari...,0178BC,FFFFFF


In [5]:
#We load the dataframe of the stops
stops = pd.read_csv('M6/stops.txt').drop(['stop_id','stop_url','stop_desc','location_type','parent_station','zone_id'], axis=1)
stops = stops.rename(columns={'stop_code': 'id','stop_lat':'lat','stop_lon':'lon'})
#And we save it to a file with a geojson format
stops.to_json('M6/stops.json')
stops.to_csv('M6/stops.csv')
#Finally we show the first five rows of the dataframe
stops.describe()

Unnamed: 0,id,lat,lon
count,4730.0,4730.0,4730.0
mean,2742.564059,40.424836,-3.682491
std,1805.45062,0.039964,0.047375
min,1.0,40.3322,-3.83603
25%,1302.25,40.392343,-3.713307
50%,2632.5,40.424915,-3.685675
75%,4092.75,40.457565,-3.649503
max,50011.0,40.51721,-3.54249


In [6]:
#We load the dataframe with the shapes of the lines and rename its columns
shapes = pd.read_csv('M6/shapes.txt').rename(columns={"shape_id": "itinerary_id","shape_pt_sequence":"sequence","shape_dist_traveled":"dist_traveled", "shape_pt_lat": "lat", "shape_pt_lon": "lon"})
#And show the dataframe
shapes.head()

Unnamed: 0,itinerary_id,lat,lon,sequence,dist_traveled
0,001_A,40.438475,-3.717931,1,0
1,001_A,40.438193,-3.717836,2,30
2,001_A,40.437973,-3.717876,3,55
3,001_A,40.437833,-3.717896,4,70
4,001_A,40.436803,-3.718076,5,186


In [7]:
def transform_route_id(df) :
    '''
    Adds a new column with the short id and the direction to the dataframe
    
        Params
        ---
        df: DataFrame
    '''
    new_df = df
    
    #Function that adds the line id
    def get_line_id(row) :
        if (row['itinerary_id'][1] == '0') & (row['itinerary_id'][0] == '0') :
            return int(row['itinerary_id'][2])
        elif row['itinerary_id'][0] == '0':
            return int(row['itinerary_id'][1:3])
        else :
            return int(row['itinerary_id'][0:3])
    
    #Function that adds the direction
    def get_route_direction(row) :
        if row['itinerary_id'][4] == 'A':
            return 1
        else:
            return 2
        
    #Function that adds the short name of the line
    def get_line_short_name(row) :
        return routes.loc[routes.route_id == row.line_id].iloc[0]['route_short_name']
    
    
    new_df["line_id"] = new_df.apply(get_line_id, axis=1)
    new_df["line_sn"] = new_df.apply(get_line_short_name, axis=1)
    new_df["direction"] = new_df.apply(get_route_direction, axis=1)
    
    return new_df[['itinerary_id','line_id','line_sn','direction','sequence','dist_traveled','lat','lon']]

#We call the functions and show the result
lines_shapes = transform_route_id(shapes)
lines_shapes.to_json('M6/lines_shapes.json')
lines_shapes.to_csv('M6/lines_shapes.csv')
lines_shapes.head()

Unnamed: 0,itinerary_id,line_id,line_sn,direction,sequence,dist_traveled,lat,lon
0,001_A,1,1,1,1,0,40.438475,-3.717931
1,001_A,1,1,1,2,30,40.438193,-3.717836
2,001_A,1,1,1,3,55,40.437973,-3.717876
3,001_A,1,1,1,4,70,40.437833,-3.717896
4,001_A,1,1,1,5,186,40.436803,-3.718076


In [8]:
#With this line we can take a look at all the bus line ids in the geodataframe
lines_shapes.line_id.unique()

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  14,
        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
        28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,
        54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,
        67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
        81,  82,  83,  85,  86,  87,  90,  91,  92,  93,  96,  99, 100,
       101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
       114, 115, 116, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
       128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
       141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,
       155, 156, 160, 161, 162, 165, 166, 171, 172, 173, 174, 175, 176,
       177, 178, 200, 203, 210, 215, 247, 310, 361, 362, 372, 401, 402,
       403, 404, 451, 452, 453, 454, 455, 456, 457, 481, 501, 50

In [9]:
#With this line we can take a look at all the bus line ids in the geodataframe
lines_shapes.line_sn.unique()

array(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
       '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24',
       '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35',
       '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46',
       '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57',
       '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', 'C1',
       'C2', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79',
       '81', '82', '83', '85', '86', '87', 'E', 'F', 'G', 'A', 'H', 'U',
       '100', '101', '102', '103', '104', '105', '106', '107', '108',
       '109', '110', '111', '112', '113', '114', '115', '116', '118',
       '119', '120', '121', '122', '123', '124', '125', '126', '127',
       '128', '129', '130', '131', '132', '133', '134', '135', '136',
       '137', '138', '139', '140', '141', '142', '143', '144', '145',
       '146', '147', '148', '149', '150', '151', '152', '153', '155',