In [1]:
import json
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [2]:
data_file = "../data/bus_stops.json"
with open(data_file, "r") as f:
    stops_data = json.loads(f.read())
stops_data

{'1': ['Jayanagar 9th Block',
  'Jayanagar 4th T Block',
  'Jayanagar 18th Main',
  'Jayanagar 4th Block Church',
  'Jayanagar 4th Block',
  'Jayanagar 3rd Block',
  'Madhavan Park',
  'South End Circle',
  'Shanthi Talkies',
  'Nagasandra Circle',
  'Nettakalappa Circle',
  'Basavanagudi Police Station',
  'National College Basavanagudi',
  'Gandhi Bazaar',
  'Ramakrishna Ashram',
  'Bangalore High School',
  'Chamarajpet/Uma Talkies',
  'Chamarajpet Police Station',
  'Mysore Road Flyover',
  'Cottonpet Hospital',
  'Goods Shed Road',
  'Kempegowda Bus Station/Majestic',
  'Platform Road',
  'Central Talkies',
  'Sheshadripuram',
  'Malleswaram Circle',
  'Malleswaram 8th Cross',
  'Malleswaram 11th Cross',
  'Malleswaram 18th Cross',
  'Tata Institute (IISc)',
  'Yeshwanthpur'],
 '1A': ['Jayanagar 9th Block',
  'Jayanagar 4th T Block',
  'Jayanagar 18th Main',
  'Jayanagar 4th Block Church',
  'Jayanagar 4th Block',
  'Jayanagar 3rd Block',
  'Madhavan Park',
  'South End Circle',
 

In [3]:
print(f"No. of bus routes: {len(stops_data)}")

No. of bus routes: 2982


In [4]:
bus_stops = set()
for route, stops in stops_data.items():
    for stop in stops:
        bus_stops.add(stop)
bus_stops

{'Nagaresha Nagenahalli',
 'Sadanapalya',
 'Nosenur',
 'Widia School',
 'Madappanahalli (Varthur Road)',
 'Murphy Town',
 'Railwaymen Layout',
 'Kodiyala Karenahalli',
 'Marappanapalya',
 'Shanthi Layout',
 'Inner Ring Road Ejipura Junction',
 'Mata Amritanandamayi Ashram Cross (Ullal Main Road)',
 'Vinayakanagar (Harohalli)',
 'Valmikinagar',
 'Koramangala Police Station',
 'Mohankumarnagar',
 'Vishwanathapura',
 'Gopalapura (Rajajinagar)',
 'Ravuthanahalli Cross',
 "State Bank Of India (St Mark's Road)",
 'Suvarnanagar',
 'Peenya 2nd Stage',
 'Nayandahalli Railway Gate',
 'Raghuvanahalli BCC Layout',
 'Vaddarahalli (Kanakapura Road)',
 'Karikalpalya Cross',
 'Ramagondanahalli (Varthur Road)',
 'Sulakunte Cross',
 'Delhi Public School South',
 'AECS Layout (Kundalahalli)',
 'Bettanahalli',
 'Ramakrishna Hegde Nagar',
 'Doddamaranahalli (Naduvarthi)',
 'Pujenahalli',
 'Agara BMTC Depot',
 'Kareemsab Layout',
 'Jeevanabhimanagar Police Station',
 'Anchipura (Nelamangala)',
 'Lakshmipura

In [5]:
print(f"No. of unique bus stops: {len(bus_stops)}")

No. of unique bus stops: 2615


In [6]:
routes = list(stops_data.keys())
route_char_lengths = list(map(len, routes))
fig = px.histogram(
    x=route_char_lengths,
    title="Bus Route Name Lengths",
    labels={"x": "Number of Characters", "y": "Count of Bus Routes"},
)
fig.show()

In [7]:
route_characters = sorted([char for route in routes for char in route])
fig = px.histogram(
    x=route_characters,
    title="Bus Route Character Distribution",
    labels={"x": "Characters", "y": "Count of Characters"},
)
fig.show()

In [8]:
character_position = np.zeros((max(route_char_lengths), len(set(route_characters))), dtype=int)
character_position

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [9]:
unique_characters = sorted(set(route_characters))
for route in routes:
    for position, char in enumerate(route):
        char_index = unique_characters.index(char)
        character_position[position, char_index] += 1
character_position

array([[   0,    0,  225, 1158,  839,  144,  105,   55,   37,   43,   44,
          20,   23,   17,    5,    0,    9,   13,    0,    0,   37,   26,
           0,   31,    0,    0,    0,    0,    2,   12,    0,  135,    2,
           0,    0,    0],
       [   1,  358,  402,  293,  251,  364,  329,  231,  200,  205,  190,
           2,   33,   26,   11,    1,   26,    0,   10,   13,    0,    1,
           0,    1,    0,    0,    0,   12,   17,    0,    0,    0,    0,
           0,    0,    0],
       [   1,  390,  347,  242,  266,  198,  299,  255,  198,  226,  129,
          55,   34,   24,   27,   42,   25,   15,   14,    0,    9,   12,
           3,    3,    5,    1,    0,   13,   38,    1,    0,    1,    0,
           0,    0,    0],
       [   1,   55,   41,   23,   18,    5,   33,   18,    5,   12,   10,
         250,  182,  179,  161,  160,  131,  116,  103,    3,   87,   92,
          57,  102,   66,   54,   45,   51,   34,   45,   16,   42,   19,
          12,   19,   28],
    

In [33]:
character_position_prob_per_position = character_position / character_position.sum(axis=1).reshape(-1, 1)
character_position_prob_per_position = np.where(character_position_prob_per_position == 0, np.nan, character_position_prob_per_position)

In [42]:
character_position_prob_per_char = character_position / character_position.sum(axis=0)
character_position_prob_per_char = np.where(character_position_prob_per_char == 0, np.nan, character_position_prob_per_char)

In [11]:
fig = go.Figure()
fig.add_trace(go.Heatmap(z=character_position_prob_per_position, x=unique_characters))
fig.update_layout(
    title="Character Position Heatmap in Bus Routes",
    xaxis_title="Characters",
    yaxis_title="Position in Route Name",
)
fig.show()

In [12]:
fig = go.Figure()
fig.add_trace(go.Heatmap(z=np.log10(character_position_prob_per_position), x=unique_characters))
fig.update_layout(
    title="Character Position Heatmap in Bus Routes (Log Scale)",
    xaxis_title="Characters",
    yaxis_title="Position in Route Name",
)

In [59]:
fig = go.Figure()
fig.add_trace(go.Heatmap(z=character_position_prob_per_char, x=unique_characters))
fig.update_layout(
    title="Character Position Heatmap in Bus Routes",
    xaxis_title="Characters",
    yaxis_title="Position in Route Name",
)
fig.show()

In [58]:
sample_array = character_position_prob_per_char[3:-1, 11:]
sample_labels = unique_characters[11:]
sample_labels, sample_array

(['A',
  'B',
  'C',
  'D',
  'E',
  'F',
  'G',
  'H',
  'I',
  'J',
  'K',
  'L',
  'M',
  'N',
  'P',
  'Q',
  'R',
  'S',
  'T',
  'U',
  'V',
  'W',
  'X',
  'Y',
  'Z'],
 array([[0.52192067, 0.54654655, 0.62152778, 0.71555556, 0.71748879,
         0.655     , 0.71165644, 0.78625954, 0.16666667, 0.63970588,
         0.58227848, 0.890625  , 0.70833333, 0.80487805, 0.87096774,
         0.9375    , 0.5862069 , 0.36170213, 0.67164179, 1.        ,
         0.22826087, 0.76      , 1.        , 1.        , 0.93333333],
        [0.29645094, 0.17117117, 0.12847222, 0.08      , 0.08071749,
         0.035     , 0.09202454, 0.03053435, 0.11111111, 0.02205882,
         0.12658228, 0.03125   , 0.02777778, 0.1097561 , 0.09677419,
         0.0625    , 0.09195402, 0.03191489, 0.10447761,        nan,
         0.01630435, 0.12      ,        nan,        nan, 0.06666667],
        [0.02087683, 0.01201201, 0.01736111, 0.01333333, 0.00896861,
         0.01      , 0.02453988,        nan,        nan,       

In [61]:
import plotly.figure_factory as ff

fig = ff.create_dendrogram(np.nan_to_num(sample_array).transpose(), labels=sample_labels, orientation="left")
fig.update_layout(height=800, width=1000)
fig.show()