In [5]:
import pandas as pd
import numpy  as np
import ast
import nltk
from nltk.stem.porter import PorterStemmer

In [6]:
bus = pd.read_csv('../assets/bus_routes.csv')
bus.duplicated().sum()

0

In [7]:
# Normalize text in 'Driver_Name' and 'Route' columns
bus['Driver_Name'] = bus['Driver_Name'].str.lower().str.strip()
bus['Route'] = bus['Route'].str.lower().str.strip()


In [8]:
# Function to convert route strings into lists of stops
def convert(route):
    """Convert a route string into a list of stops"""
    if not isinstance(route, str):
        return []
    
    stops = [stop.strip() for stop in route.split('-')]
    return stops


In [9]:
bus['Route'] = bus['Route'].apply(convert)

In [10]:
# Can be used to see if user given destination is there in unique routes .
unique_routes = [] 
for route_list in bus['Route']:
    if isinstance(route_list, list):
        for route in route_list:
            if isinstance(route, str):
                if route not in unique_routes:
                    unique_routes.append(route)

# Print the unique routes
print(unique_routes)

['clock tower', 'darshanlal chowk', 'saharanpur chowk', 'isbt dehradun', 'graphic era hill university', 'asley hall', 'matawala bagh', 'daudwala', 'mathurawala', 'vishnupuram', 'bangali kothi', 'kargi chowk', 'ranipokhri', 'doiwala', 'lachiwala', 'kuanwala', 'harawala', 'miawala', 'mokampur', 'jogiwala', 'rispana', 'gujraunwala', 'raipur chowk', 'dobhal chowk', '6 no. pulia', 'ring road', 'post office nehru gram', 'graphic era hill university/gehu', 'hathibadkala', 'garhi cant', 'vijay coloney', 'chir bagh', 'cm house', 'ongc chowk', 'ballupur chowk', 'gms road', 'rajender nagar', 'yamuna coloney', 'bindal pul', 'kishan nagar', 'blood bank', 'supply', 'it park dehradun', 'nala paani chowk', 'shastdhara crossing', 'ladpur', 'fountain chowk rispana', 'fountain chowk', 'nehru colony', 'kargi', 'nakrounda more', 'naya gaon palio', 'badowala', 'telpur', 'mehuwala', 'race course', 'dharampur', 'mata mandir', 'balbir road', 'rajpur', 'great value', 'dilaram bazar', 'nanni bakery dehradun', 's

In [11]:
df = pd.DataFrame(bus)
df.head()

Unnamed: 0,S.No,Reg.No.,Bus_No,Shift_Timing,Driver_Name,Contact_No,Route
0,1,UK07PA1699,9,7.00 AM 11.00 AM,arjun gurung,9557782210,"[clock tower, darshanlal chowk, saharanpur cho..."
1,2,UK07PA1696,18,7.00 AM 11.00 AM,sunil kumar iv,7060226291,"[clock tower, darshanlal chowk, saharanpur cho..."
2,3,UK07PA1691,24,7.00 AM 11.00 AM,m.r rama,8958990677,"[clock tower, darshanlal chowk, saharanpur cho..."
3,4,UK07PA3542,42,7.00 AM 11.00 AM,gudmohan,7060656243,"[asley hall, clock tower, darshanlal chowk, sa..."
4,5,UK07PA4816,64,7.00 AM 11.00 AM,hari singh,9410105794,"[asley hall, clock tower, darshanlal chowk, sa..."


In [12]:
# Number of Locations
print(f"Total buses: {len(df)}")

Total buses: 74


In [13]:
# creating location point file for given stops

In [14]:
!pip install geopy



In [15]:
import geopy as gp
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="bus_route_locator",timeout=10)
import time

In [16]:
# Management of package limit (Geoccoder TimeOut)
from geopy.extra.rate_limiter import RateLimiter

geolocator = Nominatim(user_agent="your_app_name")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

In [17]:
found = []
not_found = []

In [18]:
# Function to get coordinates of a location
def get_coordinates(location):
    try:

        location_in_dehradun = f"{location}, Dehradun"
        location_data = geolocator.geocode(location_in_dehradun)
        if location_data:
            found.append(location)
            return location_data.latitude, location_data.longitude
        else:
            not_found.append(location)
            return None, None
    except Exception as e:
        print(f"Error with location '{location}': {e}")
        return None, None

In [19]:
location_names = []
latitudes = []
longitudes = []

In [20]:
for location in unique_routes:
    if location.strip():  # Ignore empty locations
        lat, lon = get_coordinates(location)
        location_names.append(location)
        latitudes.append(lat)
        longitudes.append(lon)
     

In [21]:
df_locations = pd.DataFrame({
    'name': location_names,
    'Latitude': latitudes,
    'Longitude': longitudes
})

In [23]:
df_locations.to_csv("../assets/location_coordinates.csv")

In [24]:
import folium

In [26]:
df = pd.read_csv('../assets/location_coordinates.csv')

In [27]:
df = df.dropna(subset=['Latitude','Longitude'])

In [28]:
count = 0
center_lat, center_lon = df['Latitude'].iloc[0], df['Longitude'].iloc[0]
m = folium.Map(location=[center_lat, center_lon], zoom_start=6)

# Add markers for each location
for index, row in df.iterrows():
    # Display location name and coordinates as popup
    folium.Marker([row['Latitude'], row['Longitude']], popup=f"Loc : {row['name']}\n  \nLat: {row['Latitude']}\n  Lon: {row['Longitude']}").add_to(m)
    count = count+1


In [29]:
print(f"Total locations plotted : {count}")

Total locations plotted : 33


In [30]:
# Display the map
m