# Wybór danych

In [8]:
from analyse import Punctionality
import pandas as pd
import plotly.express as px


analyse_punctionality = Punctionality()
# analyse_punctionality.prepare_data()
data = analyse_punctionality.get_data()
pd.DataFrame(data).head(5)

Choose the file with the data
1. buses_location_2024-02-15_18-17-37
2. buses_location_2024-02-16_11-36-29
3. buses_location_2024-02-16_15-17-31


Unnamed: 0,VehicleNumber,Line,Brigade,Stop,DiffTime,DiffDist,BusLat,BusLon
0,1000,213,1,2057_03,0,0.007816,52.149793,21.188838
1,1000,213,1,2048_02,40,0.008392,52.160911,21.210689
2,1000,213,1,2188_04,3137,0.005497,52.214276,21.133879
3,1000,213,1,2148_08,1397,0.00861,52.223498,21.101673
4,1012,219,2,2043_03,320,0.008069,52.183931,21.18713


## Średnie czasy opóźnień dla każdej lini

In [9]:
average = data.copy().groupby("Line")["DiffTime"].mean()
average = average/60
average = pd.DataFrame(average).sort_values(by="DiffTime", ascending=False)
average.head(10)

Unnamed: 0_level_0,DiffTime
Line,Unnamed: 1_level_1
L13,58.133333
L26,43.584314
118,42.487923
133,42.128105
256,40.424762
L38,40.373333
165,40.156783
262,39.971111
104,33.999673
L16,33.494444


### Wizuazlizacja danych

In [10]:
fig = px.bar(average, x=average.index, y="DiffTime")
fig.update_layout(xaxis_title="Different Lines", yaxis_title="Average Time")
fig.show()


## Wykres przedstawiający jak duży procent opóźnień stanowią opóźniennia o danej wartości

In [11]:
plot_data = data.copy()
plot_data["DiffTime"] = plot_data["DiffTime"]/60
plot_data["DiffTime"] = plot_data["DiffTime"].apply(lambda x: int(x))
plot_data = plot_data.groupby("DiffTime").size()
plot_data = pd.DataFrame(plot_data)
plot_data = plot_data.reset_index()
plot_data.columns = ["DiffTime", "Count"]
plot_data["Percent"] = plot_data["Count"]/plot_data["Count"].sum()
plot_data = plot_data.sort_values(by="DiffTime")

fig = px.line(plot_data, x="DiffTime", y="Percent")
fig.update_layout(xaxis_title="Czas opóźnienia [min]", yaxis_title="Procentowa ilość opóźnienia")
fig.update_layout(title="Procentowa ilość opóźnienia w zależności od czasu")
fig.show()



## Największe opóźnienia

In [12]:
top_delayed = data.copy().groupby("Line")["DiffTime"].max()
top_delayed = top_delayed/60
top_delayed = pd.DataFrame(top_delayed).sort_values(by="DiffTime", ascending=False)
top_delayed.head(5)
# prawdopodobnie to błędy w danych lub przybliżeniu danych

Unnamed: 0_level_0,DiffTime
Line,Unnamed: 1_level_1
217,59.95
176,59.933333
106,59.933333
181,59.933333
152,59.933333


### Przedstawienie danych na mapie

In [13]:
df = data.copy()
df["DiffTime"] = df["DiffTime"]/60  # zmiana na minuty
df = df[df.DiffTime > 5]  # opóźnienia powyżej 5 minut

color_scale = [(0, "green"), (0.5, "yellow"), (1, "red")]

fig = px.scatter_mapbox(
    df, 
    lat = "BusLat",
    lon = "BusLon",
    hover_name = "Line",
    hover_data = ["Line", "DiffTime"],
    color = "DiffTime",
    color_continuous_scale = color_scale,
    size = "DiffTime",
    zoom = 10,
    height = 800,
    width = 800,
)

fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_traces(marker=dict(size=10, opacity=0.7))
fig.show()

## Ilość opóźnień w zależności od linii

In [14]:
df = data.copy()
df["DiffTime"] = df["DiffTime"]/60  # zmiana na minuty
df = df[df.DiffTime > 5]  # opóźnienia powyżej 5 minut

df = df.groupby("Line").size().sort_values(ascending=False)
df = pd.DataFrame(df)
df = df.reset_index()
df.columns = ["Line", "Count"]
df.head(10)

Unnamed: 0,Line,Count
0,165,66
1,118,56
2,263,42
3,255,41
4,133,41
5,149,38
6,527,37
7,104,36
8,153,32
9,256,26


In [15]:
# wykres słupkowy
fig = px.bar(df, x="Line", y="Count", text="Count")
fig.update_traces(texttemplate='%{text}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()


### Tym razem w zależności od przystanku

In [16]:
df = data.copy()
df["DiffTime"] = df["DiffTime"]/60  # zmiana na minuty
df = df[df.DiffTime > 5]  # opóźnienia powyżej 5 minut

df = df.groupby("Stop").size().sort_values(ascending=False)
df = pd.DataFrame(df)
df = df.reset_index()
df.columns = ["Stop", "Count"]
df.head(10)

Unnamed: 0,Stop,Count
0,4094_01,13
1,2148_01,11
2,2148_03,8
3,1528_01,7
4,2148_02,7
5,1253_09,7
6,3147_01,7
7,2138_02,7
8,4108_01,7
9,7044_01,6


In [17]:
fig = px.bar(df, x="Stop", y="Count", text="Count")
fig.update_traces(texttemplate='%{text}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_traces(marker_color="green", marker_line_color="black", marker_line_width=0.3, opacity=1)
fig.show()

In [18]:
import os
import pandas as pd
import json
# wczytaj dane o przystankach
path = os.getcwd()
path = os.path.join(path, "bus_stops", "bus_stops.json") 
with open(path, "r") as file:
    bus_stops = json.load(file)
bus_stops = pd.DataFrame(bus_stops).T
bus_stops.drop(columns=["kierunek", "obowiazuje_od","id_ulicy", "slupek","zespol"], inplace=True)
bus_stops.reset_index(inplace=True)


In [19]:
# merge z danymi o opóźnieniach
stops_delay = df.copy()
stops_delay = stops_delay.merge(bus_stops, left_on="Stop", right_on="index")
stops_delay.drop(columns = ["index"], inplace=True)

stops_delay["szer_geo"] = stops_delay["szer_geo"].apply(lambda x: float(x))
stops_delay["dlug_geo"] = stops_delay["dlug_geo"].apply(lambda x: float(x))


In [20]:
color_scale = [(0, "green"), (0.5, "yellow"), (1, "red")]

stops_delay = stops_delay[stops_delay.Count > 2]

fig = px.scatter_mapbox(
    stops_delay, 
    lat = "szer_geo",
    lon = "dlug_geo",
    hover_name = "Stop",
    hover_data = ["Stop","Count"],
    color = "Count",
    color_continuous_scale = color_scale,
    size = "Count",
    zoom = 10.5,
    height = 800,
    width = 800,
)

fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_traces(marker=dict(size=10, opacity=0.7))
fig.show()


## Dzielnice

źródło mapy z podziałem na dzielnice :https://github.com/andilabs/warszawa-dzielnice-geojson

In [21]:
import os
import geopandas as gpd
from shapely.geometry import Point


current_path = os.getcwd()
file_path = os.path.join(current_path, "warszawa-dzielnice.geojson")
warsaw_districts = gpd.read_file(file_path)

In [22]:
punctionality_data = data.copy()
punctionality_data["geometry"] = punctionality_data.apply(lambda x: Point((float(x["BusLon"]), float(x["BusLat"]))), axis=1)
punctionality_data = gpd.GeoDataFrame(punctionality_data, geometry="geometry")
punctionality_data.crs = warsaw_districts.crs
punctionality_data.head(5)

Unnamed: 0,VehicleNumber,Line,Brigade,Stop,DiffTime,DiffDist,BusLat,BusLon,geometry
0,1000,213,1,2057_03,0,0.007816,52.149793,21.188838,POINT (21.18884 52.14979)
1,1000,213,1,2048_02,40,0.008392,52.160911,21.210689,POINT (21.21069 52.16091)
2,1000,213,1,2188_04,3137,0.005497,52.214276,21.133879,POINT (21.13388 52.21428)
3,1000,213,1,2148_08,1397,0.00861,52.223498,21.101673,POINT (21.10167 52.22350)
4,1012,219,2,2043_03,320,0.008069,52.183931,21.18713,POINT (21.18713 52.18393)


In [23]:
punctionality_map = gpd.sjoin(left_df=punctionality_data.copy(), right_df=warsaw_districts.copy(), how="left", predicate="intersects")
punctionality_map = punctionality_map.drop(columns=["index_right", "BusLat", "BusLon", "cartodb_id"])
punctionality_map.head(5)

Unnamed: 0,VehicleNumber,Line,Brigade,Stop,DiffTime,DiffDist,geometry,name
0,1000,213,1,2057_03,0,0.007816,POINT (21.18884 52.14979),Wawer
0,1000,213,1,2057_03,0,0.007816,POINT (21.18884 52.14979),Warszawa
1,1000,213,1,2048_02,40,0.008392,POINT (21.21069 52.16091),Wawer
1,1000,213,1,2048_02,40,0.008392,POINT (21.21069 52.16091),Warszawa
2,1000,213,1,2188_04,3137,0.005497,POINT (21.13388 52.21428),Wawer


In [24]:
delays_districts = punctionality_map.copy()
delays_districts = delays_districts.drop(columns=["DiffTime", "DiffDist"])
delays_districts = delays_districts.groupby("name").size().sort_values(ascending=False)
delays_districts = pd.DataFrame(delays_districts)
delays_districts = delays_districts.reset_index()
delays_districts.columns = ["District", "Count"]
# usun wiersz zawierajacy wszystkie lokalizacje poza dzielnicami
delays_districts = delays_districts[delays_districts.District != "Warszawa"]
delays_districts

Unnamed: 0,District,Count
1,Białołęka,1172
2,Praga Południe,992
3,Mokotów,986
4,Śródmieście,779
5,Wawer,664
6,Ursynów,652
7,Targówek,619
8,Wola,591
9,Ochota,548
10,Bielany,441


In [25]:
fig = px.bar(delays_districts, x="District", y="Count", text="Count")
fig.update_traces(texttemplate='%{text}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_traces(marker_color="green", marker_line_color="black", marker_line_width=0.5, opacity=1)
fig.update_layout(xaxis_title="Dzielnica", yaxis_title="Liczba opóźnień w dzielnicy")
fig.show()
