# Cultural Tourism Route Optimization

In [3]:
import pandas as pd

### Artworks, Fountains and Monuments

In [81]:
df = pd.read_csv("Datasets/public-artworks-fountains-and-monuments.csv")
df.head(2)

### Train Routes

In [115]:
metro_train_routes = pd.read_csv("Datasets/gtfs/Metro Train/routes.txt", delimiter=",") 

In [117]:
metro_train_routes.head(2)

Unnamed: 0,route_id,agency_id,route_short_name,route_long_name,route_type,route_color,route_text_color
0,aus:vic:vic-02-ALM:,3,Alamein,Alamein - City,2,152C6B,FFFFFF
1,aus:vic:vic-02-BEG:,3,Belgrave,Belgrave - City,2,152C6B,FFFFFF


In [119]:
# Split based on 'aus:vic:vic-' and take the second part
metro_train_routes['train_id'] = metro_train_routes['route_id'].str.extract(r'aus:vic:vic-(.*?):?$', expand=False)

metro_train_routes = metro_train_routes[['train_id', 'route_short_name', 'route_long_name']]

metro_train_routes = metro_train_routes.drop_duplicates()

metro_train_routes.head()

Unnamed: 0,train_id,route_short_name,route_long_name
0,02-ALM,Alamein,Alamein - City
1,02-BEG,Belgrave,Belgrave - City
2,02-CBE,Cranbourne,Cranbourne - City
3,02-CCL,City Circle,
4,02-CGB,Craigieburn,Craigieburn - City


### Train Stops

In [146]:
metro_train_stops = pd.read_csv("Datasets/gtfs/Metro Train/stops.txt", delimiter=",")

In [148]:
metro_train_stops.head(2)

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon,location_type,parent_station,platform_code
0,10117,Jordanville Station,-37.873763,145.112473,,Parentvic:rail:JOR,1
1,10920,Flagstaff Station,-37.81188,144.956043,,Parentvic:rail:FGS,1


In [150]:
metro_train_stops = metro_train_stops[['stop_id', 'stop_name', 'stop_lat','stop_lon']]

metro_train_stops = metro_train_stops.drop_duplicates()

metro_train_stops.head()

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon
0,10117,Jordanville Station,-37.873763,145.112473
1,10920,Flagstaff Station,-37.81188,144.956043
2,10921,Flagstaff Station,-37.811725,144.955968
3,10922,Melbourne Central Station,-37.809974,144.962547
4,10923,Melbourne Central Station,-37.809865,144.962516


### Train Times

In [186]:
metro_train_times = pd.read_csv("Datasets/gtfs/Metro Train/stop_times.txt", delimiter=",")

  metro_train_times = pd.read_csv("Datasets/gtfs/Metro Train/stop_times.txt", delimiter=",")


In [187]:
metro_train_times.head(2)

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled
0,02-ALM--16-T2-2302,04:57:00,04:57:00,11197,1,,0,0,0.0
1,02-ALM--16-T2-2302,04:58:00,04:58:00,11198,2,,0,0,716.06


In [188]:
metro_train_times['train_id'] = metro_train_times['trip_id'].str.extract(r'(^[^-]+-[^-]+)')

metro_train_times = metro_train_times[['trip_id', 'train_id', 'stop_id', 'stop_sequence']]

metro_train_times = metro_train_times.drop_duplicates()

metro_train_times['stop_id'] = metro_train_times['stop_id'].astype('object')

metro_train_times.head()

Unnamed: 0,trip_id,train_id,stop_id,stop_sequence
0,02-ALM--16-T2-2302,02-ALM,11197,1
1,02-ALM--16-T2-2302,02-ALM,11198,2
2,02-ALM--16-T2-2302,02-ALM,11200,3
3,02-ALM--16-T2-2302,02-ALM,11202,4
4,02-ALM--16-T2-2302,02-ALM,11203,5


In [206]:
stop_count = metro_train_times.groupby('trip_id')['stop_sequence'].count().reset_index()
stop_count
stop_seq_count = stop_count.sort_values(by='stop_sequence', ascending=False)
stop_seq_count

Unnamed: 0,trip_id,stop_sequence
3499,02-BEG--16-T6-3602,31
2498,02-BEG--1-T6-3602,31
3994,02-BEG--17-T6-3602,31
4108,02-BEG--8-T5-3602,31
3873,02-BEG--17-T5-3602,31
...,...,...
7708,02-CGB--16-T2-BC482,2
7709,02-CGB--16-T2-BC484,2
7710,02-CGB--16-T2-BC485,2
8323,02-CGB--16-T3-BC928,2


In [191]:
metro_train_times.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 689765 entries, 0 to 689764
Data columns (total 4 columns):
 #   Column         Non-Null Count   Dtype 
---  ------         --------------   ----- 
 0   trip_id        689765 non-null  object
 1   train_id       689765 non-null  object
 2   stop_id        689765 non-null  object
 3   stop_sequence  689765 non-null  int64 
dtypes: int64(1), object(3)
memory usage: 21.1+ MB


In [166]:
metro_train_times['stop_id'] = metro_train_times['stop_id'].astype(str).str.strip()
metro_train_stops['stop_id'] = metro_train_stops['stop_id'].astype(str).str.strip()

result_df = pd.merge(metro_train_times, metro_train_stops, on='stop_id')
result_df.head()

Unnamed: 0,train_id,stop_id,stop_sequence,stop_name,stop_lat,stop_lon
0,02-ALM,11197,1,Alamein Station,-37.868204,145.079727
1,02-ALM,11198,2,Ashburton Station,-37.861932,145.08139
2,02-ALM,11200,3,Burwood Station,-37.851744,145.08054
3,02-ALM,11202,4,Hartwell Station,-37.843883,145.075426
4,02-ALM,11203,5,Willison Station,-37.835432,145.070055


In [182]:
fil = result_df.loc[result_df['train_id'] == '02-BEG']
fil.head(30)

Unnamed: 0,train_id,stop_id,stop_sequence,stop_name,stop_lat,stop_lon
73,02-BEG,11120,1,Belgrave Station,-37.9091,145.355132
74,02-BEG,11121,2,Tecoma Station,-37.90809,145.342904
75,02-BEG,11122,3,Upwey Station,-37.90371,145.331413
76,02-BEG,11245,4,Upper Ferntree Gully Station,-37.892596,145.307293
77,02-BEG,11247,5,Ferntree Gully Station,-37.881342,145.294952
78,02-BEG,11249,6,Boronia Station,-37.860529,145.284736
79,02-BEG,11410,7,Bayswater Station,-37.841866,145.268179
80,02-BEG,11412,8,Heathmont Station,-37.828494,145.244563
81,02-BEG,12237,9,Ringwood Station,-37.815924,145.22907
82,02-BEG,12234,10,Heatherdale Station,-37.818832,145.214584


In [184]:
fil.info()

<class 'pandas.core.frame.DataFrame'>
Index: 232 entries, 73 to 304
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   train_id       232 non-null    object 
 1   stop_id        232 non-null    object 
 2   stop_sequence  232 non-null    int64  
 3   stop_name      232 non-null    object 
 4   stop_lat       232 non-null    float64
 5   stop_lon       232 non-null    float64
dtypes: float64(2), int64(1), object(3)
memory usage: 12.7+ KB


Unnamed: 0,train_id,stop_id,stop_sequence
0,02-ALM,11197,1
1,02-ALM,11198,2


In [79]:
result_df = pd.merge(metro_train_times, metro_train_stops, on='stop_id')
result_df.head()

ValueError: You are trying to merge on int64 and object columns for key 'stop_id'. If you wish to proceed you should use pd.concat