# Static Data of the lines and stops of the urban buses in EMT Madrid
In this notebook we are going to transform the data found in gtfs format to a geodataframe format, which is easier to work with. We can find the gtfs at the following link: 
https://transitfeeds.com/p/emt-madrid/212/20200309/download

After downloading it, we have to unzip it, and then we are ready to work with the .txt files found inside.

For this notebook to work, modify the path below to the one of the folder with the unzipped data of the GTFS.

In [None]:
GTFSpath = 'myfolder/mygtfsfolder' 

In [None]:
#We load the modules of interest
import pandas as pd
import json
%matplotlib inline

In [None]:
#Load line_stops_dict
with open('../Data/Static/line_stops_dict.json', 'r') as f:
    line_stops_dict = json.load(f)

## Load routes data

In [None]:
#We load the route data into a dataframe
routes = pd.read_csv(GTFSpath+'/routes.txt')
#And we show the dataframe
routes.head()

## Load stops data

In [None]:
#We load the dataframe of the stops
stops = pd.read_csv(GTFSpath+'/stops.txt').drop(['stop_id','stop_url','stop_desc','location_type','parent_station','zone_id'], axis=1)
stops = stops.rename(columns={'stop_code': 'id','stop_lat':'lat','stop_lon':'lon'})
stops.head()

In [None]:
#And we save it to a file with a geojson format
stops.to_csv(GTFSpath+'/stops.csv')
#Finally we show the first five rows of the dataframe
stops.describe()

## Load stop times data

In [None]:
#Add distance of each one to beggining of line
stop_times = pd.read_csv(GTFSpath+'/stop_times.txt')#[['stop_id','shape_dist_traveled']].drop_duplicates()

def separate_trip_id(df):
    day_types,lines,bus_pos,bus_trip = [],[],[],[]
    for row in df.itertuples():
        day_types.append(row.trip_id[0:2])
        lines.append(int(row.trip_id[2:5]))
        bus_pos.append(int(row.trip_id[5:8]))
        bus_trip.append(int(row.trip_id[8:]))
    
    df['day_type'] = day_types
    df['line'] = lines
    df['bus_pos'] = bus_pos
    df['bus_trip'] = bus_trip
    
    return df

stop_times = separate_trip_id(stop_times)[['day_type','line','bus_pos','bus_trip','stop_id','stop_sequence','shape_dist_traveled']]
stop_times.head()

In [None]:
lines = [1,44,82,91,92,99,132,133,502,506]
bus_trips = [1,2]
bus_pos = 1
stop_times_reduced = stop_times.loc[(stop_times.line.isin(lines))&(stop_times.bus_trip.isin(bus_trips))&(stop_times.bus_pos==bus_pos)]
stop_times_reduced.head()

## Load frequencies data

In [None]:
#Add distance of each one to beggining of line
frequencies = pd.read_csv(GTFSpath+'/frequencies.txt')
#Parse the  datetime
frequencies['start_time'] = frequencies.start_time.apply(lambda x: str(int(x[0:2])%24)+x[2:] if int(x[0:2])>=24 else x)
frequencies['start_time'] = pd.to_datetime(frequencies['start_time'],format='%H:%M:%S').dt.time
frequencies['end_time'] = frequencies.end_time.apply(lambda x: str(int(x[0:2])%24)+x[2:] if int(x[0:2])>=24 else x)
frequencies['end_time'] = pd.to_datetime(frequencies['end_time'],format='%H:%M:%S').dt.time

#Separate trip id parts
frequencies = separate_trip_id(frequencies)[['day_type','line','bus_pos','bus_trip','start_time','end_time','headway_secs']]
frequencies = frequencies.sort_values(by=['line','start_time'],ascending=True)
frequencies.loc[(frequencies.line==1)&((frequencies.bus_pos%2)==0)&(frequencies.day_type=='LA')][-20:]

## Load shapes of the lines data

In [None]:
#We load the dataframe with the shapes of the lines and rename its columns
shapes = pd.read_csv(GTFSpath+'/shapes.txt').rename(columns={"shape_pt_sequence":"sequence","shape_dist_traveled":"dist_traveled", "shape_pt_lat": "lat", "shape_pt_lon": "lon"})
#And show the dataframe
shapes.head()

In [None]:
def transform_route_id(df) :
    '''
    Adds a new column with the short id and the direction to the dataframe
    
        Params
        ---
        df: DataFrame
    '''
    new_df = df
    
    #Function that adds the line id
    def get_line_id(row) :
        if (row['shape_id'][1] == '0') & (row['shape_id'][0] == '0') :
            return int(row['shape_id'][2])
        elif row['shape_id'][0] == '0':
            return int(row['shape_id'][1:3])
        else :
            return int(row['shape_id'][0:3])
    
    #Function that adds the direction
    def get_route_direction(row) :
        if row['shape_id'][4] == 'A':
            return 1
        else:
            return 2
        
    #Function that adds the short name of the line
    def get_line_short_name(row) :
        return routes.loc[routes.route_id == row.line_id].iloc[0]['route_short_name']
    
    
    new_df["line_id"] = new_df.apply(get_line_id, axis=1)
    new_df["line_sn"] = new_df.apply(get_line_short_name, axis=1)
    new_df["direction"] = new_df.apply(get_route_direction, axis=1)
    
    return new_df[['shape_id','line_id','line_sn','direction','sequence','dist_traveled','lat','lon']]

#We call the functions and show the result
lines_shapes = transform_route_id(shapes)
lines_shapes.to_csv('../Data/Static/lines_shapes.csv')
lines_shapes.head()

In [None]:
#With this line we can take a look at all the bus line ids in the geodataframe
lines_shapes.line_id.unique()

In [None]:
#With this line we can take a look at all the bus line ids in the geodataframe
lines_shapes.line_sn.unique()

## Create lines collected dictionary
Dictionary with the short name, destinations, stops and distances to that stops for each of the lines collected

In [None]:
#Distance to every stop in line dictionary
destinations_sn = {
    '1' : ('1', ('CRISTO REY', 'PROSPERIDAD')),
    '44' : ('44', ('CALLAO', 'MARQUES DE VIANA') ),
    '82' : ('82', ('MONCLOA', 'PITIS') ),
    '132' : ('132', ('MONCLOA', 'HOSPITAL LA PAZ') ),
    '133' : ('133', ('CALLAO', 'MIRASIERRA') ),
    '91' : ('F', ('CUATRO CAMINOS', 'CIUDAD UNIVERSITARIA') ),
    '92' : ('G', ('MONCLOA', 'CIUDAD UNIVERSITARIA') ),
    '99' : ('U', ('AVENIDA SENECA', 'PARANINFO') ),
    '502' : ('N2', ('CIBELES', 'VALDEBEBAS') ),
    '506' : ('N6', ('CIBELES', 'LAS ROSAS') )
}

line_sns = ['1','44','82','F','G','U','132','133','N2','N6']
line_ids = ['1','44','82','91','92','99','132','133','502','506']
lines_collected_dict = {}
for i in range(10) :
    line_sn = line_sns[i]
    line_id = line_ids[i]
    
    lines_collected_dict[line_sn] = {}
    lines_collected_dict[line_sn]['line_id'] = line_id
    lines_collected_dict[line_sn]['destinations'] = destinations_sn[line_id][1]
    for direction in ['1','2'] :
        stops = line_stops_dict[line_id][direction]
        lines_collected_dict[line_sn][direction] = {}
        lines_collected_dict[line_sn][direction]['length'] = str(lines_shapes.loc[(lines_shapes['line_id']==int(line_id))&(lines_shapes['direction']==int(direction))].dist_traveled.max()) 
        lines_collected_dict[line_sn][direction]['stops'] = stops 
        lines_collected_dict[line_sn][direction]['distances'] = {}
        
        trip_index = 1
        day_type = 'LA'
        stops_data = stop_times.loc[(stop_times.day_type==day_type)&(stop_times.line==int(line_id))&(stop_times.bus_pos==1)&(stop_times.bus_trip==trip_index)]
        while (int(stops[3]) not in stops_data.stop_id.unique().tolist()) and (trip_index < 6): 
            trip_index += 1
            stops_data = stop_times.loc[(stop_times.day_type==day_type)&(stop_times.line==int(line_id))&(stop_times.bus_pos==1)&(stop_times.bus_trip==trip_index)]
            if trip_index > 3 :
                day_type = 'FE'
        for stop in stops :
            stop_data = stops_data.loc[stops_data.stop_id == int(stop)].iloc[0]
            lines_collected_dict[line_sn][direction]['distances'][stop] = str(stop_data.shape_dist_traveled)

with open('../Data/Static/lines_collected_dict.json', 'w') as fp:
    json.dump(lines_collected_dict, fp)
lines_collected_dict