## Step 1: Import required libraries and packages

In [None]:
import pandas as pd
import numpy as np
import datetime
import folium
from folium import plugins
from folium.plugins import HeatMapWithTime
from folium.features import DivIcon
import matplotlib.colors as colors
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import randomcolor
from geopy.distance import distance
from datetime import date
import time
import requests
import math
from folium.plugins import BeautifyIcon
import configparser
from sklearn.cluster import KMeans
from yellowbrick.cluster import KElbowVisualizer
pd.set_option('mode.chained_assignment', None)

In [None]:
# following packages are mainly required to save the folium maps to images (.png)
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

binary = 'C:\\Program Files\\Mozilla Firefox\\firefox.exe'
options = Options()
options.headless=True
options.binary = binary
cap = DesiredCapabilities().FIREFOX
cap["marionette"] = True
driver = webdriver.Firefox(options=options, capabilities=cap, executable_path="C:\geckodriver-v0.26.0-win64\geckodriver.exe")

import io
from PIL import Image

In [None]:
# set directories
resource_directory = "00_Ntbk_Resources\\01_DataAnalysis\\00_ProcessedData\\PMPML_BusRoutes_July2019\\"

## Step 2: Get the dataset
source: http://opendata.punecorporation.org/Citizen/CitizenDatasets/Index

In [None]:
# we've already cleaned and pre-processed the dataset downloaded from the website, and saved to CSV ile for further use. (refer 00_Ntbk_PuneBusRoutes_DataCollection,Processing,Cleaning.ipynb)
# let's just import processed CSVs into our dataframes
df_BusRouteShapes = pd.read_csv(f"{resource_directory}shapes.txt")
df_BusStopTimes = pd.read_csv(f"{resource_directory}stop_times.txt", parse_dates= ["arrival_time", "departure_time"])
df_BusStops = pd.read_csv(f"{resource_directory}stops.txt")
df_BusTrips = pd.read_csv(f"{resource_directory}trips.txt")
df_TripCalendar = pd.read_csv(f"{resource_directory}calendar.txt")

## Step 3: Data Preparation
create a dataframe that will contain information of all trips and their stop times at a single point

In [None]:
df_BusScheduleInfo = df_BusStopTimes.copy()
df_BusScheduleInfo = df_BusScheduleInfo.join(df_BusStops.set_index("stop_id"), on="stop_id")
df_BusScheduleInfo = df_BusScheduleInfo.join(df_BusTrips[["trip_id", "service_id", "route_id", "trip_headsign", "trip_distance"]].set_index("trip_id"), on="trip_id")
df_BusScheduleInfo["trip_bgn_time"] = df_BusScheduleInfo.groupby("trip_id")["arrival_time"].transform("first")
df_BusScheduleInfo["trip_end_time"] = df_BusScheduleInfo.groupby("trip_id")["arrival_time"].transform("last")
df_BusScheduleInfo["trip_duration"] = df_BusScheduleInfo["trip_end_time"] - df_BusScheduleInfo["trip_bgn_time"]
df_BusScheduleInfo["trip_bgn_stop_id"] = df_BusScheduleInfo.groupby("trip_id")["stop_id"].transform("first")
df_BusScheduleInfo["trip_end_stop_id"] = df_BusScheduleInfo.groupby("trip_id")["stop_id"].transform("last")

df_BusScheduleInfo

In [None]:
# give each trip_id a unique color to represent on the map
rand_color = randomcolor.RandomColor()
list_TripColor = rand_color.generate(count=len(df_BusScheduleInfo["trip_end_stop_id"].unique()))
df_TripColors = pd.DataFrame([list(df_BusScheduleInfo["trip_end_stop_id"].unique()), list_TripColor])
df_TripColors = df_TripColors.transpose()
df_TripColors.columns = ["trip_end_stop_id", "trip_color"]
df_BusScheduleInfo = df_BusScheduleInfo.join(df_TripColors.set_index("trip_end_stop_id"), on = "trip_end_stop_id")

df_BusScheduleInfo

In [None]:
df_BusScheduleInfo["trip_id"].unique().size

In [None]:
# rearrange the dataframe columns
list_ColumnsRearranged = ["trip_id", "service_id", "trip_headsign", "route_id", "trip_distance", "trip_color", "trip_duration","trip_bgn_time", "trip_end_time", "trip_bgn_stop_id", "trip_end_stop_id", "stop_sequence", "stop_id", "stop_name", "stop_lat", "stop_lon", "arrival_time", "departure_time"]
df_BusScheduleInfo = df_BusScheduleInfo[list_ColumnsRearranged]

df_BusScheduleInfo

In [None]:
df_BusScheduleInfo[["trip_id", "trip_headsign", "trip_bgn_time", "trip_end_time", "trip_duration"]].sort_values("trip_duration").head(8305)

### Quickfacts of our dataframe

In [None]:
# longest trip (based on trip distance)
df_BusScheduleInfo[df_BusScheduleInfo["trip_distance"] == df_BusScheduleInfo["trip_distance"].max()][["trip_headsign","trip_distance"]].head(1)

In [None]:
# shortest trip (based on trip distance)
df_BusScheduleInfo[df_BusScheduleInfo["trip_distance"] == df_BusScheduleInfo["trip_distance"].min()][["trip_headsign","trip_distance","trip_duration"]].head(1)

In [None]:
# longest trip (based on trip duration)
df_BusScheduleInfo[df_BusScheduleInfo["trip_duration"] == df_BusScheduleInfo["trip_duration"].max()][["trip_headsign","trip_duration"]].head(1)

In [None]:
# shortest trip (based on trip duration)
df_BusScheduleInfo[df_BusScheduleInfo["trip_duration"] == df_BusScheduleInfo["trip_duration"].min()][["trip_headsign","trip_duration"]].head(1)

## Step 4: Get Foursquare location information
We must identify unique stop_ids and their co-ordinates to make request for Foursquare API

In [None]:
# get the unique stop_ids of destinations for all trips
df_trip_destinations = df_BusScheduleInfo[["trip_id", "trip_end_stop_id"]]
df_trip_destinations = df_trip_destinations.drop_duplicates()
df_trip_destinations = df_trip_destinations.sort_values(by="trip_end_stop_id")
df_trip_destinations["trip_count"] = df_trip_destinations.groupby("trip_end_stop_id")["trip_id"].transform("count")
df_trip_destinations = df_trip_destinations.sort_values(by="trip_count", ascending = False)
df_trip_destinations.reset_index(inplace=True, drop=True)
df_trip_destinations = df_trip_destinations[["trip_end_stop_id", "trip_count"]]
df_trip_destinations = df_trip_destinations.drop_duplicates()
df_trip_destinations.reset_index(inplace=True, drop=True)
df_trip_destinations = df_trip_destinations.join(df_BusStops.set_index("stop_id"), on ="trip_end_stop_id")

df_trip_destinations

In [None]:
# read credentials required for Foursquare request
FoursquareCreds = configparser.ConfigParser()
FoursquareCreds.read("00_Ntbk_Resources\\01_DataAnalysis\\FoursquareCredentials.ini")
CLIENT_ID = FoursquareCreds.get('CREDENTIALS','CLIENT_ID') # Foursquare ID
CLIENT_SECRET = FoursquareCreds.get('CREDENTIALS','CLIENT_SECRET') # Foursquare Secret

# define attributes required for Foursquare request
VERSION = date.today().strftime("%Y%m%d") # Foursquare API version
RADIUS = 1000 # define radius 

In [None]:
# form url to make Foursquare API call
url = 'https://api.foursquare.com/v2/venues/categories/?&client_id={}&client_secret={}&v={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET,
        VERSION)

# read response received from Foursquare request
url_response = {}
while url_response == {}:
    try:
        url_response = requests.get(url).json()
        break
    except:
        print("Connection refused by the server..")
        time.sleep(10)
        url_response = {}
        continue

In [None]:
# let's find out the main categories of Foursquare venues and their ids to create request URL
# dict_venues_top_categories = {}
# for i in range(len(url_response["response"]['categories'])) :
#     dict_venues_top_categories[url_response["response"]['categories'][i]["name"]] = url_response["response"]['categories'][i]["id"]
    
# dict_venues_top_categories

Let me give some information about these main venue categories:\
1) Arts & Entertainment: Movies theaters, museum, sports stadium, theme park, zoo, etc.\
2) College & University: University, college, school, grounds, etc.\
3) Event: Street sale, festival place, main market, etc.\
4) Food: Restaurant, bakery, coffee shop, cafe, etc.\
5) Nightlife Spot: Bar, lounge, pub, etc.\
6) Outdoors & Recreation: Botanical garden, gym, pool, track, hill, farm, forest, lake, river, etc.\
7) Professional & Other Places: Business center, distribution center, factory, government building, hospitals, camps, etc.\
8) Residence: Home, bungalow, apartments, residential building, etc.\
9) Shop & Service: ATM, bank, shops, petrol pumps, salons, etc.\
10) Travel & Transport: bus/railway/metro station, airport, hotels, tunnels, roads, etc.

In [None]:
# function that will create Foursquare url request and handle/return the received response
def venues_extractor (client_id, client_secret, version, radius, lat, long) :
    
    dict_venues_category = {}
    url_response = {}
   
    for venue_category_id in list(dict_venues_top_categories.values()):
        url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&categoryId={}'.format(
                client_id, 
                client_secret, 
                version, 
                lat, 
                long, 
                radius, 
                venue_category_id)               
                
        url_response = {}
        error_count = 0
        while url_response == {}:
            try:
                url_response = requests.get(url).json()
                if 'totalResults' in  url_response['response'].keys():
                    error_count = 0
                    break
                elif error_count > 5 :
                    time.sleep(10)
                    url_response = {}
                    error_count = 0
                    continue
                else :
                    print("Invalid data sent by the server..")
                    time.sleep(10)
                    url_response = {}
                    error_count = error_count + 1
                    continue
            except:
                print("Connection refused by the server..")
                time.sleep(10)
                url_response = {}
                continue
        
        if error_count >= 5 :
            dict_venues_category[list(dict_venues_top_categories.keys())[list(dict_venues_top_categories.values()).index(venue_category_id)]] = 0
        else :
            dict_venues_category[list(dict_venues_top_categories.keys())[list(dict_venues_top_categories.values()).index(venue_category_id)]] = url_response['response']['totalResults'] 
        time.sleep(0.01)
        
    return dict_venues_category

In [None]:
# call the above function for different trip destinations and save the response to CSV

df_venues_category = pd.DataFrame()

# call the defined function and extract the venues
for lat, long in zip(df_trip_destinations["stop_lat"], df_trip_destinations["stop_lon"]) :
    dict_venues_category = venues_extractor(CLIENT_ID,
                                           CLIENT_SECRET,
                                           VERSION,
                                           RADIUS,                                                                   
                                           lat,
                                           long
                                           )
    #######################
    # develope a dataframe that contains all the venues for given neighbourhood    
    df_venues_category = pd.concat([df_venues_category, pd.DataFrame(dict_venues_category, index = [len(df_venues_category)])])
 
df_venues_category.to_csv(f"{resource_directory}stops_foursquare_data.txt", index=False)

In [None]:
# read-out CSV containing destination location information (venue details) and combine with stop-id co-ordinates to form dataframe for clustering
df_venues_category = pd.read_csv(f"{resource_directory}stops_foursquare_data.txt")
df_trip_destinations = df_trip_destinations.join(df_venues_category)
df_trip_destinations

## Step 5: Clustering the data on the basis of stop location/venue details

In [None]:
# let's normalize the dataframe to train the model 
df_trip_destinations_nrmlzd = df_trip_destinations.iloc[:, 5:]
df_trip_destinations_nrmlzd = df_trip_destinations_nrmlzd.div(df_trip_destinations_nrmlzd.sum())
df_trip_destinations_nrmlzd

In [None]:
# find-out best value of k
# instantiate the clustering model and visualizer, we'll use a fixed random state to make randomness deterministic.
model = KMeans(random_state=0)
visualizer = KElbowVisualizer(model, k=(1,20), timings=False)
# fit the data to the visualizer and get elbow value
best_k = visualizer.fit(df_trip_destinations_nrmlzd.values).elbow_value_  
visualizer.show() # finalize and render the figure

In [None]:
# train the model with best value of k and find-out cluster labels

kclusters = best_k
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(df_trip_destinations_nrmlzd.values)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

In [None]:
# insert the cluster labels to our dataframe
df_trip_destinations_clustered = df_trip_destinations.copy()
df_trip_destinations_clustered.insert(5, 'cluster_label', kmeans.labels_)
df_trip_destinations_clustered

In [None]:
# create map
map_trip_destinations_clustered = folium.Map(location=[df_trip_destinations_clustered["stop_lat"].mean(), df_trip_destinations_clustered["stop_lon"].mean()], zoom_start=11)

# set color scheme for the clusters
# colors_array = cm.rainbow(np.linspace(0, 1, kclusters))
# colors_array = [colors.rgb2hex(i) for i in colors_array]
colors_array = ['#000000', '#FF0000', '#0000FF', '#FF00FF']

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_trip_destinations_clustered['stop_lat'], df_trip_destinations_clustered['stop_lon'], df_trip_destinations_clustered['stop_name'], df_trip_destinations_clustered['cluster_label']):
    label = folium.Popup(str(poi) + '\n - \n Cluster' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=colors_array[cluster-1],
        fill=True,
        fill_color=colors_array[cluster-1],
        fill_opacity=1).add_to(map_trip_destinations_clustered)
    
map_trip_destinations_clustered


In [None]:
# let's check how clustering is done..
colors_array = ['#FF00FF', '#000000', '#FF0000', '#0000FF']
df_trip_destinations_nrmlzd_for_plt = df_trip_destinations_nrmlzd.copy()
df_trip_destinations_nrmlzd_for_plt.insert(0, 'cluster_label', kmeans.labels_)
df_trip_destinations_nrmlzd_for_plt = df_trip_destinations_nrmlzd_for_plt.groupby("cluster_label").describe()
df_trip_destinations_nrmlzd_for_plt = df_trip_destinations_nrmlzd_for_plt.loc[:,df_trip_destinations_nrmlzd_for_plt.columns.get_level_values(1).isin({"75%"})]
df_trip_destinations_nrmlzd_for_plt = df_trip_destinations_nrmlzd_for_plt.transpose()
df_trip_destinations_nrmlzd_for_plt.index = [x[0] for x in df_trip_destinations_nrmlzd_for_plt.index.to_list()]
df_trip_destinations_nrmlzd_for_plt.plot(color=colors_array)
plt.xticks(range(0,len(df_trip_destinations_nrmlzd_for_plt.index)), df_trip_destinations_nrmlzd_for_plt.index, rotation=90)
plt.show()

In [None]:
# plot bus_stop_count for each cluster
df_trip_destinations_clustered["cluster_label"].value_counts().to_frame().rename(columns={"cluster_label":"bus_stop_count"}).plot(kind='bar')
plt.xticks(rotation=0)
plt.show()

Conclusion :

What are cluster 1 & 2?
There are two significant differences between these two clusters (Number of Events and
travel/transport venues). Cluster 2 contains the highest amount of transport venues
compared to any of the clusters. Cluster 1 is very similar to cluster 2 with the only
significant difference in the number of Events venues.

What are cluster 0 & 3?
Cluster 0 has a smaller number of venues compared to any of the other clusters.
Cluster 3 has more venues than cluster 0 but lesser than cluster 1 or 2.

So, from the above observation and plot, we can say that:
- Cluster 2 is a developed part of the city and contains bus stops that are close to
transport venues such as railway / bus / metro stations or airports etc.
- Cluster 1 is also a developed part of the city with the highest amount of Event
venues.
- Cluster 0 is remote / underdeveloped part of the city and
- Cluster 3 is a developing part of the city.
- From the map, we can say that, although clusters 1,2, and 3 have lesser bus stops,
they are densely located. This suggests that bus-stops have very good connectivity
in the central / old part of the city.

From the bar chart, we can say that bus service has a great network in for remote parts of the city (cluster 0), then it also covers travel / transport venues with a comparatively (compared to cluster 1) higher number of bus stops (cluster 2).

## Step 6: Visualize the bus frequency 
let's cluster the data based on total trip_count per bus-stop and find-out busiest bus-stops

In [None]:
# let's normalize the dataframe to train the model 
df_trip_destinations_nrmlzd = df_trip_destinations.filter(items=["trip_count"])
df_trip_destinations_nrmlzd = df_trip_destinations_nrmlzd.div(df_trip_destinations_nrmlzd.sum())
df_trip_destinations_nrmlzd

In [None]:
# find-out best value of k
# instantiate the clustering model and visualizer, we'll use fixed random state to make randomness deterministic.
model = KMeans(random_state=0)
visualizer = KElbowVisualizer(model, k=(1,20), timings=False)
# fit the data to the visualizer and get elbow value
best_k = visualizer.fit(df_trip_destinations_nrmlzd.values).elbow_value_  
visualizer.show() # finalize and render the figure

In [None]:
# train the model with best value of k and find-out cluster labels

kclusters = best_k
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(df_trip_destinations_nrmlzd.values)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

In [None]:
# insert the cluster labels to our dataframe
df_trip_destinations_clustered = df_trip_destinations.copy()
df_trip_destinations_clustered.insert(5, 'cluster_label', kmeans.labels_)
df_trip_destinations_clustered

In [None]:
# let's check how clustering is done...
df_trip_destinations_nrmlzd_for_plt = df_trip_destinations_clustered[["cluster_label","trip_count"]].copy()
df_trip_destinations_nrmlzd_for_plt = df_trip_destinations_nrmlzd_for_plt.groupby("cluster_label").describe()
df_trip_destinations_nrmlzd_for_plt = df_trip_destinations_nrmlzd_for_plt.loc[:,df_trip_destinations_nrmlzd_for_plt.columns.get_level_values(1).isin({"75%"})]

df_trip_destinations_nrmlzd_for_plt.plot(kind="bar")
plt.xticks(rotation=0)
plt.show()

In [None]:
# bus stops in cluster 1 & 2 are the busiest
# let's keep bus stops from cluster 1 & 2, and drop other for further analysis
df_trip_destinations_clustered = df_trip_destinations_clustered[(df_trip_destinations_clustered["cluster_label"]==1) | (df_trip_destinations_clustered["cluster_label"]==2)]
df_trip_destinations_clustered

In [None]:
# create map
map_trip_destinations_clustered = folium.Map(location=[df_trip_destinations_clustered["stop_lat"].mean(), df_trip_destinations_clustered["stop_lon"].mean()], zoom_start=11)

# set color scheme for the clusters
colors_array = cm.rainbow(np.linspace(0, 1, kclusters))
colors_array = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_trip_destinations_clustered['stop_lat'], df_trip_destinations_clustered['stop_lon'], df_trip_destinations_clustered['stop_name'], df_trip_destinations_clustered['cluster_label']):
    label = folium.Popup(str(poi) + '\n - \n Cluster' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=colors_array[cluster-1],
        fill=True,
        fill_color=colors_array[cluster-1],
        fill_opacity=1).add_to(map_trip_destinations_clustered)
       
map_trip_destinations_clustered

In [None]:
# now we know the busiest bus stop ids, lets capture trips that include filtered stop_id
df1 = df_BusScheduleInfo.join(df_trip_destinations_clustered.filter(items=["trip_end_stop_id"]).set_index("trip_end_stop_id"), on="trip_end_stop_id", how ="inner").reset_index(drop=True)
df2 = df_BusScheduleInfo.join(df_trip_destinations_clustered.filter(items=["trip_end_stop_id"]).set_index("trip_end_stop_id"), on="trip_bgn_stop_id", how ="inner").reset_index(drop=True)
df_BusScheduleInfo = df1.append(df2).reset_index(drop=True)
df_BusScheduleInfo

Now, let's visualize the data.. 

In [None]:
# attributes for filtering the bus scheduling bus information from dataframe
start_datetime = datetime.datetime(2019,8,2,0,0,0) # let's look at the bus frequency on friday 02/08/2019
stop_datetime = datetime.datetime(2019,8,3,2,0,0)
delta_time = datetime.timedelta(minutes=2) # check the bus frequency for every 2 minutes

In [None]:
# find out which service_ids are running on the above day
service_datetime = start_datetime
service_day = (service_datetime.strftime('%A')).lower()
service_date = int(str(service_datetime.date()).replace("-",""))
df_ServiceID = df_TripCalendar[(df_TripCalendar["start_date"] <= service_date) & (df_TripCalendar["end_date"] >= service_date)]
df_ServiceID = df_ServiceID[["service_id",service_day]]
df_ServiceID = df_ServiceID[df_ServiceID[service_day] == 1][["service_id"]]
df_ServiceID

In [None]:
# filter-out the bus schedule information based on running service_ids and adjust the datetime according to our requirement
df_BusLocationWithDT = pd.DataFrame()
df_BusLocationWithDT = df_ServiceID.join(df_BusScheduleInfo.set_index("service_id"), on = "service_id")  
df_BusLocationWithDT[["trip_bgn_time", "trip_end_time", "arrival_time", "departure_time"]] = df_BusLocationWithDT[["trip_bgn_time", "trip_end_time", "arrival_time", "departure_time"]] + (start_datetime.date() - df_BusLocationWithDT["trip_bgn_time"].min().date())
df_BusLocationWithDT.reset_index(drop = True, inplace=True)
df_BusLocationWithDT

In [None]:
# create a dataframe that will contain no. of buses in motion for every 2 minutes
df_UniqueBusTripsWithDT = df_BusLocationWithDT.groupby("trip_id").head(1).reset_index(drop=True)
df_LiveBusCount = pd.DataFrame()
service_datetime = start_datetime
while service_datetime <= stop_datetime:
    live_count = df_UniqueBusTripsWithDT[((service_datetime >= df_UniqueBusTripsWithDT["trip_bgn_time"]) & (service_datetime <= df_UniqueBusTripsWithDT["trip_end_time"]))]["trip_id"].count()
    dict_live_count = {"date_time" : service_datetime, "live_count" : live_count}
    df_temp = pd.DataFrame(dict_live_count, index = [len(df_LiveBusCount)])
    df_LiveBusCount = pd.concat([df_LiveBusCount, df_temp]) 
    service_datetime = service_datetime + delta_time
df_LiveBusCount

In [None]:
# plot the no. of buses in motion vs datetime
df_LiveBusCount.set_index('date_time', inplace=True)
df_LiveBusCount.index = pd.to_datetime(df_LiveBusCount.index)
df_LiveBusCount.plot(figsize=(20,10), grid = True)
plt.show()

Conclusion:

From the plot, we can say that bus frequency rapidly increases around 6 am and touches the morning peak of approx. 660 buses in motion at around 9:30 am. We can call it 'morning rush'.\
After the noon bus frequency again increases close to 700 buses in motion and remains there up till 05:00 pm.\
Between 05:00 pm to 07:30 pm bus frequency is close to 750 buses in motion, we can call it as 'evening rush'.\
From 07:30 pm onwards frequency declines gradually till 10:00 pm, and after that, it takes a plunge.

In [None]:
# to visualize the bus transit frequency on the map, we should save the images of bus location for respective datetime
# on the map and convert those bunch of images into .gif below code will create the images of all the bus locations for 
# respective time and save them in .png format, later we'll convert them into .gif

service_datetime = start_datetime
while service_datetime <= stop_datetime:    
    df_filtered_BusLocationWithDT = df_BusLocationWithDT[((service_datetime >= df_BusLocationWithDT["trip_bgn_time"]) & (service_datetime <= df_BusLocationWithDT["trip_end_time"]) & (service_datetime >= df_BusLocationWithDT["arrival_time"]))]
    df_filtered_BusLocationWithDT = df_filtered_BusLocationWithDT[df_filtered_BusLocationWithDT.groupby("trip_id")["stop_sequence"].transform(max) == df_filtered_BusLocationWithDT["stop_sequence"]]
   
    map_pune = folium.Map(location=[df_BusStops["stop_lat"].mean(), df_BusStops["stop_lon"].mean()], 
                      tiles='CartoDB dark_matter',
                      zoom_start=12)    
    
    for lat, lng, label, stop_id in zip(df_trip_destinations_clustered["stop_lat"], df_filtered_BusLocationWithDT["stop_lon"], df_trip_destinations_clustered["stop_name"], df_trip_destinations_clustered["trip_end_stop_id"]) :
        folium.CircleMarker(
                            location = [lat, lng],
                            radius=4,
                            fill_opacity=1.0,
                            fill_color = '#FF3030',
                            color = '#FF3030',
                            popup=label
                            ).add_to(map_pune)

    for lat, lng, color, label in zip(df_filtered_BusLocationWithDT["stop_lat"], df_filtered_BusLocationWithDT["stop_lon"], df_filtered_BusLocationWithDT["trip_color"], df_filtered_BusLocationWithDT["trip_headsign"]) :
        folium.RegularPolygonMarker(
                                    location = [lat, lng],
                                    number_of_sides=4,
                                    radius=4,
                                    color = '#00FFFF',
                                    #color = color,
                                    fill_opacity=1.0,
                                    fill_color = '#00FFFF',
                                    popup=label
                                    ).add_to(map_pune)
    
    folium.Marker([18.486872, 73.613156], 
                  icon=DivIcon(
                                icon_size=(150,36),
                                icon_anchor=(7,20),
                                html=f'<div style="font-size: 40pt; color : white">{service_datetime}</div>')
                ).add_to(map_pune)   
    
    img_data = map_pune._to_png()
    img = Image.open(io.BytesIO(img_data))
    img.save(f"00_Ntbk_Resources\\01_DataAnalysis\\01_ImagesForGIF\\image1.png")

    service_datetime = service_datetime + delta_time

Now, we've images/frames that can be used to make GIF.\
There are approx. 819 images/frames (size: 339 MB), since combining these images to form GIF will require lot of memory, We've used 'PhotoScape X' software (Windows 10) to make GIF.\
Settings for software are :\
Frame rate : 20 frames/sec\
Frame size : 500 x 251 px\
Loop : Infinite

Conclusion :\
From the .gif visualization we can conclude on the following points:

1) There are approx. 7 roads that are used by bus trips to connect/pass through the central part of the city.\
2) Bus frequency is fairly similar on every bus stop on these 7 main roads of the city for any given period.\
3) We can observe comparatively high traffic in the central part of the city from 7:30 am to 10:30 pm