In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

data = pd.read_csv('../DataOut/bus_trip_all_points.csv')

In [2]:
data

Unnamed: 0,id,deviceid,devicetime,latitude,longitude,speed,date,time,geometry,bus_stop,trip_id,direction
0,574733098,116,2021-10-16 08:03:04,7.293092,80.635573,9.17927,2021-10-16,08:03:04,POINT (184746.41595346577 232322.75960448402),BT01,2.0,1
1,574733099,116,2021-10-16 08:03:19,7.293068,80.636430,14.03890,2021-10-16,08:03:19,POINT (184841.00784031872 232320.15459256113),,2.0,1
2,574733100,116,2021-10-16 08:03:34,7.293043,80.637552,10.25920,2021-10-16,08:03:34,POINT (184964.84865244984 232317.35306792473),,2.0,1
3,574733101,116,2021-10-16 08:03:49,7.292725,80.637707,7.55940,2021-10-16,08:03:49,POINT (184981.95255171193 232282.14959255166),,2.0,1
4,574733102,116,2021-10-16 08:04:04,7.292292,80.637663,3.77970,2021-10-16,08:04:04,POINT (184977.15733984314 232234.22458233108),,2.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
303224,585350956,1377,2021-10-27 19:02:53,7.290715,80.638227,5.39957,2021-10-27,19:02:53,POINT (185039.30275057122 232059.86188720367),,1365.0,2
303225,585350957,1377,2021-10-27 19:03:08,7.291095,80.637743,8.09935,2021-10-27,19:03:08,POINT (184985.95129993305 232101.89901670374),,1365.0,2
303226,585351802,1377,2021-10-27 19:03:23,7.291273,80.637273,5.39957,2021-10-27,19:03:23,POINT (184934.06182375905 232121.63129080654),,1365.0,2
303227,585351803,1377,2021-10-27 19:03:38,7.291202,80.636218,18.89850,2021-10-27,19:03:38,POINT (184817.57099444696 232113.7373533016),,1365.0,2


## feature Eng

### Add Acceleration

In [3]:

# Assuming you have a DataFrame named 'data' with columns: 'speed', 'devicetime', and 'trip_id'
# Convert the 'devicetime' column to pandas Timestamp if it's not already
data['devicetime'] = pd.to_datetime(data['devicetime'])

# Calculate time difference between consecutive rows within the same trip
data['time_diff'] = data.groupby('trip_id')['devicetime'].diff()

# Calculate change in speed between consecutive rows within the same trip
data['speed_diff'] = data.groupby('trip_id')['speed'].diff()

# Calculate acceleration by dividing speed difference by time difference (avoiding division by zero)
data['acceleration'] = data['speed_diff'] / data['time_diff'].dt.total_seconds()


In [4]:
# fill Nan values with 0
data['acceleration'].fillna(0, inplace=True)
# drop time_diff and speed_diff columns
data.drop(['time_diff', 'speed_diff'], axis=1, inplace=True)

In [5]:
# save to csv
data.to_csv('../DataOut/bus_trip_all_points_with_acceleration.csv', index=False)

In [6]:
data

Unnamed: 0,id,deviceid,devicetime,latitude,longitude,speed,date,time,geometry,bus_stop,trip_id,direction,acceleration
0,574733098,116,2021-10-16 08:03:04,7.293092,80.635573,9.17927,2021-10-16,08:03:04,POINT (184746.41595346577 232322.75960448402),BT01,2.0,1,0.000000
1,574733099,116,2021-10-16 08:03:19,7.293068,80.636430,14.03890,2021-10-16,08:03:19,POINT (184841.00784031872 232320.15459256113),,2.0,1,0.323975
2,574733100,116,2021-10-16 08:03:34,7.293043,80.637552,10.25920,2021-10-16,08:03:34,POINT (184964.84865244984 232317.35306792473),,2.0,1,-0.251980
3,574733101,116,2021-10-16 08:03:49,7.292725,80.637707,7.55940,2021-10-16,08:03:49,POINT (184981.95255171193 232282.14959255166),,2.0,1,-0.179987
4,574733102,116,2021-10-16 08:04:04,7.292292,80.637663,3.77970,2021-10-16,08:04:04,POINT (184977.15733984314 232234.22458233108),,2.0,1,-0.251980
...,...,...,...,...,...,...,...,...,...,...,...,...,...
303224,585350956,1377,2021-10-27 19:02:53,7.290715,80.638227,5.39957,2021-10-27,19:02:53,POINT (185039.30275057122 232059.86188720367),,1365.0,2,-0.431969
303225,585350957,1377,2021-10-27 19:03:08,7.291095,80.637743,8.09935,2021-10-27,19:03:08,POINT (184985.95129993305 232101.89901670374),,1365.0,2,0.179985
303226,585351802,1377,2021-10-27 19:03:23,7.291273,80.637273,5.39957,2021-10-27,19:03:23,POINT (184934.06182375905 232121.63129080654),,1365.0,2,-0.179985
303227,585351803,1377,2021-10-27 19:03:38,7.291202,80.636218,18.89850,2021-10-27,19:03:38,POINT (184817.57099444696 232113.7373533016),,1365.0,2,0.899929


### Radial Acc

In [7]:
from math import cos, radians, sqrt
from pyproj import Proj, Transformer

def lat_lon_to_utm(lat, lon):
    # Create a UTM projection for the appropriate UTM zone (Zone 44N for Sri Lanka)
    utm_zone = 44
    utm_proj = Proj(proj='utm', zone=utm_zone, ellps='WGS84')

    # Convert latitude and longitude to UTM coordinates
    utm_easting, utm_northing = utm_proj(lon, lat)
    return utm_easting, utm_northing
# Calculate radius of curvature using OpenStreetMap API

def calculate_radius_of_curvature(x1, y1, x2, y2, x3,y3):
    x12 = x1 - x2;
    x13 = x1 - x3;
 
    y12 = y1 - y2;
    y13 = y1 - y3;
 
    y31 = y3 - y1;
    y21 = y2 - y1;
 
    x31 = x3 - x1;
    x21 = x2 - x1;
 
    # x1^2 - x3^2
    sx13 = pow(x1, 2) - pow(x3, 2);
 
    # y1^2 - y3^2
    sy13 = pow(y1, 2) - pow(y3, 2);
 
    sx21 = pow(x2, 2) - pow(x1, 2);
    sy21 = pow(y2, 2) - pow(y1, 2);
    
    denominator = 2 * ((y31) * (x12) - (y21) * (x13))
    
    # Avoid division by zero
    if denominator == 0:
        return 0  # Or some other value you want to use
        
    f = (((sx13) * (x12) + (sy13) *
          (x12) + (sx21) * (x13) +
          (sy21) * (x13)) // denominator)
             
    g = (((sx13) * (y12) + (sy13) * (y12) +
          (sx21) * (y13) + (sy21) * (y13)) //
          (2 * ((x31) * (y12) - (x21) * (y13))));
 
    c = (-pow(x1, 2) - pow(y1, 2) -
         2 * g * x1 - 2 * f * y1);
 
    # eqn of circle be x^2 + y^2 + 2*g*x + 2*f*y + c = 0
    # where centre is (h = -g, k = -f) and
    # radius r as r^2 = h^2 + k^2 - c
    h = -g;
    k = -f;
    sqr_of_r = h * h + k * k - c;
 
    # r is the radius
    r = round(sqrt(sqr_of_r), 5);
    return r
    

# Calculate radial acceleration based on speed and radius of curvature
def calculate_radial_acceleration(speed, radius_of_curvature):
    if radius_of_curvature != 0:
        radial_acceleration = speed**2 / radius_of_curvature
    else:
        radial_acceleration = 0
    return radial_acceleration

# Create a dictionary to store DataFrames for each trip ID
trip_dataframes = {}
window_size = 10  # Number of rows to use for calculating radius of curvature
trips=data['trip_id'].unique()
# Iterate over trip IDs to calculate radial acceleration for each trip's DataFrame
for trip_id in trips:
    trip_df = data[data["trip_id"] == trip_id].copy()  # Get DataFrame for the current trip
    
    # Reset the index of trip_df only once
    trip_df.reset_index(drop=True, inplace=True)
    
    for i in range(window_size, len(trip_df) - window_size):
        lat1, lon1 = trip_df.iloc[i - window_size]["latitude"], trip_df.iloc[i - window_size]["longitude"]
        lat2, lon2 = trip_df.iloc[i]["latitude"], trip_df.iloc[i]["longitude"]
        lat3, lon3 = trip_df.iloc[i + window_size]["latitude"], trip_df.iloc[i + window_size]["longitude"]
        
        x1, y1 = lat_lon_to_utm(lat1, lon1)
        x2, y2 = lat_lon_to_utm(lat2, lon2)
        x3, y3 = lat_lon_to_utm(lat3, lon3)

        radius_of_curvature = calculate_radius_of_curvature(x1, y1, x2, y2, x3, y3)
        speed = trip_df.iloc[i]["speed"]

        radial_acceleration = calculate_radial_acceleration(speed, radius_of_curvature)
        trip_df.at[i, "radial_acceleration"] = radial_acceleration

    trip_dataframes[trip_id] = trip_df  # Store the calculated DataFrame for the trip

# Concatenate all trip-specific dataframes into one
concatenated_df = pd.concat(trip_dataframes.values(), ignore_index=True)

# Iterate through rows and assign radial acceleration values to the original dataframe
for index, row in concatenated_df.iterrows():
    data.loc[data['id'] == row['id'], 'radial_acceleration'] = row['radial_acceleration']

print(data)

In [None]:
# import pandas as pd

# # Create the original dataframe
# data = pd.DataFrame({
#     'id': [1, 2, 3, 4, 5],
#     'trip_id': [1, 2, 1, 3, 2],
#     'radial_acceleration': [0.0, 0.0, 0.0, 0.0, 0.0]
# })

# # Create trip-specific dataframes with calculated radial acceleration values
# trip_dataframes = {
#     1: pd.DataFrame({'id': [1, 3], 'radial_acceleration': [10.0, 15.0]}),
#     2: pd.DataFrame({'id': [2, 5], 'radial_acceleration': [20.0, 25.0]})
# }

# # Concatenate all trip-specific dataframes into one
# concatenated_df = pd.concat(trip_dataframes.values(), ignore_index=True)

# # Iterate through rows and assign radial acceleration values to the original dataframe
# for index, row in concatenated_df.iterrows():
#     data.loc[data['id'] == row['id'], 'radial_acceleration'] = row['radial_acceleration']

# print(data)


In [None]:
trip_dataframes[4].head(20)

In [None]:
# fill Nan values with 0
data['radial_acceleration'].fillna(0, inplace=True)

In [None]:
data.to_csv('../DataOut/bus_trip_all_points_with_acceleration_and_radial_acceleration.csv', index=False)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Assuming you have 'data' DataFrame containing bus data

# Convert 'devicetime' column to datetime
data['devicetime'] = pd.to_datetime(data['devicetime'], unit='s')

# Draw a line graph of radial acceleration against time for a single trip
def draw_radial_acceleration_graph(trip_id):
    trip_df = data[data["trip_id"] == trip_id]
    plt.figure(figsize=(10, 6))  # Adjust figure size as needed
    plt.plot(trip_df["devicetime"], trip_df["radial_acceleration"])
    plt.xlabel("Time")
    plt.ylabel("radial_acceleration")
    plt.title("Trip " + str(trip_id))
    plt.xticks(rotation=45)  # Rotate x-axis labels for better visibility
    plt.tight_layout()  # Adjust layout for better formatting
    plt.show()

draw_radial_acceleration_graph(2)


In [None]:
data.head(20)

## Cluster Test

In [None]:
datain_one_direction=data[data['direction']==1]
data_to_cluster=datain_one_direction[['deviceid','speed']]

In [None]:

# from sklearn.preprocessing import LabelEncoder

# # Initialize the LabelEncoder
# label_encoder = LabelEncoder()

# # Fit and transform the 'bus_stop' column
# data_to_cluster['bus_stop'] = label_encoder.fit_transform(data_to_cluster['bus_stop'])

# # Display the encoded DataFrame
# print(data_to_cluster)


In [None]:
# group by device_id by getting the mean value of speed
# data_to_cluster_mean=data_to_cluster.groupby('deviceid')['speed'].mean().reset_index()

# reset index
data_to_cluster=data_to_cluster.reset_index(drop=True)

speed_only_df=data_to_cluster['speed'].to_frame()

In [None]:
speed_only_df

In [None]:
# print all the column names
print(speed_only_df.columns)

In [None]:
# from sklearn.cluster import KMeans
# import matplotlib.pyplot as plt

# # Assuming you have your preprocessed data in 'data_to_cluster_mean'

# # List to store distortion values
# distortions = []

# # Choose a range of cluster counts to try
# cluster_range = range(1, 11)

# # Calculate distortion for each cluster count
# for num_clusters in cluster_range:
#     kmeans = KMeans(n_clusters=num_clusters, random_state=0)
#     kmeans.fit(data_to_cluster_mean)
#     distortions.append(kmeans.inertia_)  # Inertia is the sum of squared distances to the nearest cluster center

# # Plot the elbow curve
# plt.plot(cluster_range, distortions, marker='o')
# plt.xlabel('Number of Clusters')
# plt.ylabel('Distortion')
# plt.title('Elbow Method')
# plt.show()


In [None]:


# # Preprocess your data if needed (e.g., feature scaling)
# scaler = StandardScaler()
# scaled_data = scaler.fit_transform(data_to_cluster)

# Choose the number of clusters
num_clusters = 4

# Initialize the clustering algorithm
kmeans = KMeans(n_clusters=num_clusters, random_state=0)

# Fit the algorithm to your data
kmeans.fit(speed_only_df)

# Get cluster assignments for each data point
cluster_labels = kmeans.labels_

# Get cluster centers (centroids)
cluster_centers = kmeans.cluster_centers_

# append data_to_cluster deviceid column to speed_only_df dataframe
speed_only_df['deviceid']=data_to_cluster['deviceid']

# Add cluster labels to your original dataset
data_with_clusters = pd.DataFrame(speed_only_df, columns=['deviceid', 'speed'])
data_with_clusters['cluster'] = cluster_labels

# Visualize the clusters (for 2D data)
plt.scatter(data_with_clusters['deviceid'], data_with_clusters['speed'], c=cluster_labels, cmap='viridis')
# plt.scatter(cluster_centers[:, 0], cluster_centers[:, 1], marker='X', s=100, c='red')
plt.xlabel('deviceid')
plt.ylabel('speed')
plt.title('Cluster Visualization')
plt.show()


In [None]:
# create a dataframe group by device id and taking the mode of cluster column in data_with_clusters dataframe
cluster_df=data_with_clusters.groupby('deviceid')['cluster'].agg(pd.Series.mode).reset_index()

In [None]:
cluster_df


In [None]:
data

In [None]:
# in data dataframe, select all the raws within a given date around a 5 minitues time interval around a given time
data['devicetime']=pd.to_datetime(data['devicetime'])

# Given date and time
given_date = '2021-10-16'
given_time = '08:03:04'

# Convert given time to datetime format
given_datetime = pd.to_datetime('2021-10-27 14:50:45')

# Calculate time range for the interval
time_range = pd.to_timedelta('1 minutes')

# Filter rows within the time interval around the given time
selected_rows = data[
    (data['devicetime'] >= (given_datetime - time_range)) &
    (data['devicetime'] <= (given_datetime + time_range))
]




In [None]:
selected_rows

## Time serious cluster test

## Visulaise bus running on map

In [None]:
!pip install folium ipywidgets pandas


In [None]:
pip install ipywidgets


In [None]:
import folium
import pandas as pd
from ipywidgets import interact, IntSlider



# Convert timestamp column to datetime format
data['devicetime'] = pd.to_datetime(data['devicetime'])


In [None]:
data['devicetime'].min()

In [None]:
from geopy.geocoders import Nominatim

# Create a geolocator object
geolocator = Nominatim(user_agent="my_geocoder")

# Get the location coordinates (latitude, longitude) for Kandy
Nattarampota_location = geolocator.geocode("Nattarampota, Sri Lanka")
Nattarampota_latitude = Nattarampota_location.latitude
Nattarampota_longitude = Nattarampota_location.longitude

print("Latitude:", Nattarampota_latitude, "Longitude:", Nattarampota_longitude)
Nattarampota_location = [Nattarampota_latitude, Nattarampota_longitude]  # Kandy city location

In [None]:
from geopy.geocoders import Nominatim

# Create a geolocator object
geolocator = Nominatim(user_agent="my_geocoder")

# Get the location coordinates (latitude, longitude) for Kandy
kandy_location = geolocator.geocode("Kandy, Sri Lanka")
kandy_latitude = kandy_location.latitude
kandy_longitude = kandy_location.longitude

print("Latitude:", kandy_latitude, "Longitude:", kandy_longitude)
Kandy_location = [kandy_latitude, kandy_longitude]  # Kandy city location

In [None]:
from geopy.geocoders import Nominatim

# Create a geolocator object
geolocator = Nominatim(user_agent="my_geocoder")

# Get the location coordinates (latitude, longitude) for digana
digana_location = geolocator.geocode("Digana, Sri Lanka")
digana_latitude = digana_location.latitude
digana_longitude = digana_location.longitude

print("Latitude:", digana_latitude, "Longitude:", digana_longitude)
digana_location = [digana_latitude, digana_longitude]  # Kandy city location

In [None]:
data.head(20)

In [None]:
import folium
import pandas as pd
from shapely.geometry import Point
from ipywidgets import FloatSlider, interact, Label, Output, VBox, Button, HBox

# Assuming you have 'data' DataFrame containing bus data

# Dictionary to store last known location of each bus
bus_locations = {}

# Create Point geometries for Kandy and Digana
kandy_location = Point(kandy_latitude, kandy_longitude)
digana_location = Point(digana_latitude, digana_longitude)  # Define the coordinates for Digana

def create_map():
    return folium.Map(location=Nattarampota_location, zoom_start=14)

def update_map(timestamp):
    m = create_map()  # Create a new map instance each time
    interval = pd.Timedelta(seconds=15)
    timestamp_dt = pd.Timestamp.fromtimestamp(timestamp)  # Convert timestamp to pandas Timestamp
    interval_start = timestamp_dt - interval / 2
    interval_end = timestamp_dt + interval / 2

    bus_data_within_interval = data[(data['devicetime'] >= interval_start) & (data['devicetime'] <= interval_end)]
    bus_data_within_interval = bus_data_within_interval.sort_values(by='devicetime', ascending=False)
    unique_buses = data['deviceid'].unique()

    for deviceid in unique_buses:
        latest_data_for_bus = None  # Initialize the variable here

        bus_data_for_bus = bus_data_within_interval[bus_data_within_interval['deviceid'] == deviceid]
        if len(bus_data_for_bus) > 0:
            latest_data_for_bus = bus_data_for_bus.iloc[0]
            bus_location = [latest_data_for_bus['latitude'], latest_data_for_bus['longitude']]
            bus_locations[deviceid] = bus_location
        else:
            bus_location = bus_locations.get(deviceid)

        if bus_location is not None:
            bus_point = Point(bus_location[0], bus_location[1])

            # Adjust the color using a consistent formula
            color = "#{:06x}".format(deviceid + 100000)

            # get speed acceleration and radial acceleration into three variables
            if latest_data_for_bus is not None:
                speed = latest_data_for_bus['speed']
                acceleration = latest_data_for_bus['acceleration']
                radial_acceleration = latest_data_for_bus['radial_acceleration']
                print(speed, acceleration, radial_acceleration)
            else:
                speed = 0
                acceleration = 0
                radial_acceleration = 0

            folium.Marker(
                location=bus_location,
                popup=[deviceid, speed, acceleration, radial_acceleration],
                color=color  # Use the calculated color
            ).add_to(m)

    return m

min_timestamp = data['devicetime'].min()
max_timestamp = data['devicetime'].max()

# Calculate step size in seconds
step_size_seconds = 15
slider = FloatSlider(value=min_timestamp.timestamp(), min=min_timestamp.timestamp(), max=max_timestamp.timestamp(), step=step_size_seconds)

# Adjust the length of the slider using CSS style
slider.style.handle_length = '500px'  # Adjust the length as desired

time_label = Label(value=pd.Timestamp.fromtimestamp(slider.value).strftime('%Y-%m-%d %H:%M:%S'))

def update_time_label(change):
    time_label.value = pd.Timestamp.fromtimestamp(change['new']).strftime('%Y-%m-%d %H:%M:%S')

slider.observe(update_time_label, 'value')

stop = False  # Declare stop as a global variable

# Button to stop continuous play
def stop_continuous(_):
    global stop
    stop = True
    
# Button to move slider left
def move_left(_):
    slider.value -= step_size_seconds

# Button to move slider right
def move_right(_):
    slider.value += step_size_seconds

# Button to move slider left continuously
def move_left_continuous(_):
    while slider.value > slider.min and not stop:
        slider.value -= step_size_seconds

# Button to move slider right continuously
def move_right_continuous(_):
    while slider.value < slider.max and not stop:
        slider.value += step_size_seconds





left_button = Button(description='<<', icon='angle-left')
right_button = Button(description='>>', icon='angle-right')
left_continuous_button = Button(description='Continuous <<', icon='step-backward')
right_continuous_button = Button(description='Continuous >>', icon='step-forward')
stop_button = Button(description='Stop', icon='stop')

left_button.on_click(move_left)
right_button.on_click(move_right)
left_continuous_button.on_click(move_left_continuous)
right_continuous_button.on_click(move_right_continuous)
stop_button.on_click(stop_continuous)

buttons_row = HBox([left_button, left_continuous_button, right_button, right_continuous_button, stop_button])
controls_box = VBox([buttons_row, slider, time_label])

out_map = Output()
out_map.layout.height = '600px'  # Adjust the height as desired

def update_map_output(change):
    with out_map:
        out_map.clear_output(wait=True)
        display(update_map(slider.value))

slider.observe(update_map_output, 'value')

app_layout = VBox([controls_box, out_map])

display(app_layout)
