<a href="https://colab.research.google.com/github/IlyaZutler/Bus_lanes/blob/main/DM%20_%20LinesGPT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [62]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import LineString, Point
from shapely.ops import unary_union
import folium
from geopy.distance import geodesic

In [63]:
# 'trips.xlsx' is uploaded to Colab environment
df_trips = pd.read_excel('trips.xlsx', header=0)  # header=0 means the first row is the header
df_trips.sample(2)

Unnamed: 0,Car_ID,Day_time,Сoordinates,Speed_gl,Ignition
1127,77403 (М947ОК67) Renault Logan,17.02.2025 16:48:36,"55,76693°, 37,830445°",0,-
1105,77403 (М947ОК67) Renault Logan,17.02.2025 14:58:36,"55,76693°, 37,830445°",0,-


In [64]:
# Split the 'Coordinates' column into 'Longitude' and 'Latitude'
df_trips['Сoordinates'] = df_trips['Сoordinates'].str.replace('°', '').str.replace(',', '.')
df_trips[['Latitude', 'Longitude']] = df_trips['Сoordinates'].str.split('. ', expand=True)
# df_trips.drop(['Сoordinates'], axis=1, inplace=True)

# Convert 'Day_time' to datetime objects
df_trips['Day_time'] = pd.to_datetime(df_trips['Day_time'], format='%d.%m.%Y %H:%M:%S', errors='coerce')

# df_trips['Ignition'] = df_trips['Ignition'].str.replace('-', '0').str.replace('+', '1')

df_trips = df_trips.sort_values(['Car_ID', 'Day_time'])  # Сортируем по времени

# Преобразование поездок в геометрию
df_trips['geometry'] = [Point(lon, lat) for lon, lat in zip(df_trips['Longitude'], df_trips['Latitude'])]
df_trips = gpd.GeoDataFrame(df_trips, geometry='geometry', crs="EPSG:4326")

df_trips.sample(2)

Unnamed: 0,Car_ID,Day_time,Сoordinates,Speed_gl,Ignition,Latitude,Longitude,geometry
2230,77403 (М947ОК67) Renault Logan,2025-02-18 05:18:55,55.771391. 37.833866,0,+,55.771391,37.833866,POINT (37.83387 55.77139)
2509,77403 (М947ОК67) Renault Logan,2025-02-18 07:06:45,55.766878. 37.823128,23,+,55.766878,37.823128,POINT (37.82313 55.76688)


In [65]:
# Загрузка данных о Выделенных полосах
gdf_bus_lanes = gpd.read_file("bus-lanes.geojson")

# Устанавливаем корректную CRS
gdf_bus_lanes = gdf_bus_lanes.set_crs("EPSG:4326")

gdf_bus_lanes[gdf_bus_lanes['short_name'] == 'Москва'].sample(2)

Unnamed: 0,admin_level,index_right,lanes,length,name,place,population,short_name,type,lanes_length,geometry
771,,,1,187.535613,Москва,,12655050.0,Москва,,187.535613,"LINESTRING (37.46766 55.5804, 37.46871 55.5797..."
650,,,1,77.019669,Москва,,12655050.0,Москва,,77.019669,"LINESTRING (37.60669 55.76639, 37.60652 55.76707)"


In [66]:
# Преобразуем CRS в метры (EPSG:3857)
gdf_bus_lanes = gdf_bus_lanes.to_crs(epsg=3857)
# Делаем buffer на 30 метров
gdf_bus_lanes["geometry"] = gdf_bus_lanes.geometry.buffer(50)  # тест на 30 км
# Возвращаем обратно в географические координаты (EPSG:4326)
gdf_bus_lanes = gdf_bus_lanes.to_crs(epsg=4326)
gdf_bus_lanes.sample(2)

Unnamed: 0,admin_level,index_right,lanes,length,name,place,population,short_name,type,lanes_length,geometry
647,,,1,1077.882922,Москва,,12655050.0,Москва,,1077.882922,"POLYGON ((37.66478 55.73991, 37.67189 55.73935..."
200,4.0,696.0,1,93.530348,Санкт-Петербург,state,5384342.0,Санкт-Петербург,,93.530348,"POLYGON ((30.25864 59.87329, 30.25868 59.87329..."


In [67]:
gdf_bus_lanes_M = gdf_bus_lanes[gdf_bus_lanes['short_name'] == 'Москва']
gdf_bus_lanes_M.sample(2)

Unnamed: 0,admin_level,index_right,lanes,length,name,place,population,short_name,type,lanes_length,geometry
761,,,1,565.372633,Москва,,12655050.0,Москва,,565.372633,"POLYGON ((37.46045 55.72616, 37.4604 55.72616,..."
593,,,1,437.57166,Москва,,12655050.0,Москва,,437.57166,"POLYGON ((37.71045 55.6578, 37.71048 55.65781,..."


In [68]:
# # Ускоренная проверка пересечений
# Объединяем все линии в один MultiLineString
bus_lanes_union = unary_union(gdf_bus_lanes_M.geometry)

# Проверяем пересечение точек с объединенной зоной
df_trips["on_bus_lane"] = df_trips["geometry"].apply(lambda point: bus_lanes_union.intersects(point))
df_trips.sample(2)

Unnamed: 0,Car_ID,Day_time,Сoordinates,Speed_gl,Ignition,Latitude,Longitude,geometry,on_bus_lane
13,77403 (М947ОК67) Renault Logan,2025-02-17 01:07:39,55.771763. 37.83387,0,-,55.771763,37.83387,POINT (37.83387 55.77176),False
2258,77403 (М947ОК67) Renault Logan,2025-02-18 05:20:48,55.76748. 37.831541,23,+,55.76748,37.831541,POINT (37.83154 55.76748),False


In [None]:
# bus_lane_sindex = gdf_bus_lanes_M.sindex  # Создаем пространственный индекс

# def check_intersection(point):
#     possible_matches_index = list(bus_lane_sindex.intersection(point.bounds))
#     possible_matches = gdf_bus_lanes_M.iloc[possible_matches_index]

#     if possible_matches.empty:
#         return False

#     return unary_union(possible_matches.geometry).intersects(point)

# df_trips["on_bus_lane"] = df_trips["geometry"].apply(check_intersection)

In [69]:
# Анализ скорости на участках
df_trips["next_time"] = df_trips.groupby('Car_ID')['Day_time'].shift(-1)
df_trips["next_lon"] = df_trips.groupby('Car_ID')['Longitude'].shift(-1)
df_trips["next_lat"] = df_trips.groupby('Car_ID')['Latitude'].shift(-1)
df_trips["next_on_bus_lane"] = df_trips.groupby('Car_ID')['on_bus_lane'].shift(-1)

# Вычисляем расстояние между последовательными точками в км
# Создаем два массива для координат
coords1 = np.column_stack((df_trips['Latitude'], df_trips['Longitude']))
coords2 = np.column_stack((df_trips['next_lat'], df_trips['next_lon']))

# Функция для вычисления расстояний
def calculate_distances(coords1, coords2):
    return np.array([geodesic((lat1, lon1), (lat2, lon2)).meters / 1000
                     if not pd.isna(lat2) else np.nan
                     for (lat1, lon1), (lat2, lon2) in zip(coords1, coords2)])

# Применяем функцию для всей таблицы
df_trips['distance'] = calculate_distances(coords1, coords2)



# Вычисляем время между последовательными точками, часов, Заменяем NaN на None
time_diff = (df_trips["next_time"] - df_trips['Day_time']).dt.total_seconds() / 3600
df_trips["time"] = time_diff.where(df_trips["next_time"].notna(), None)



# Проверяем, чтобы значения в столбцах "distance" и "time" были валидными, и вычисляем скорость
df_trips["speed"] = np.where(
    (pd.notnull(df_trips["distance"])) & (df_trips["time"].notnull()) & (df_trips["time"] != 0),
    df_trips["distance"] / df_trips["time"],
    None
)

In [None]:
df_trips.iloc[236:239]

Unnamed: 0,Car_ID,Day_time,Сoordinates,Speed_gl,Ignition,Longitude,Latitude,geometry,on_bus_lane,next_time,next_lon,next_lat,distance,time,speed
236,77403 (М947ОК67) Renault Logan,2025-02-17 06:53:27,55.767596. 37.83425,15,1,55.767596,37.83425,POINT (55.7676 37.83425),False,2025-02-17 06:53:31,55.76751,37.834183,0.010612,0.001111,9.550852
237,77403 (М947ОК67) Renault Logan,2025-02-17 06:53:31,55.76751. 37.834183,7,1,55.76751,37.834183,POINT (55.76751 37.83418),False,2025-02-17 06:53:32,55.767491,37.834136,0.005478,0.000278,19.721727
238,77403 (М947ОК67) Renault Logan,2025-02-17 06:53:32,55.767491. 37.834136,14,1,55.767491,37.834136,POINT (55.76749 37.83414),False,2025-02-17 06:53:33,55.767483,37.834058,0.008686,0.000278,31.269875


In [70]:
# Оценка скорости на выделенных полосах
avg_speed_bus_lane = 50

# Рассчитываем предсказанное время для каждой строки
df_trips['predicted_time'] = np.where(
    (df_trips["on_bus_lane"] & df_trips["next_on_bus_lane"]),
    np.minimum(df_trips['time'], df_trips['distance'] / avg_speed_bus_lane),
    df_trips['time']
)

In [71]:
df_trips.iloc[236:239]

Unnamed: 0,Car_ID,Day_time,Сoordinates,Speed_gl,Ignition,Latitude,Longitude,geometry,on_bus_lane,next_time,next_lon,next_lat,next_on_bus_lane,distance,time,speed,predicted_time
236,77403 (М947ОК67) Renault Logan,2025-02-17 06:53:27,55.767596. 37.83425,15,+,55.767596,37.83425,POINT (37.83425 55.7676),False,2025-02-17 06:53:31,37.834183,55.76751,False,0.010458,0.001111,9.412056,0.001111
237,77403 (М947ОК67) Renault Logan,2025-02-17 06:53:31,55.76751. 37.834183,7,+,55.76751,37.834183,POINT (37.83418 55.76751),False,2025-02-17 06:53:32,37.834136,55.767491,False,0.00363,0.000278,13.068405,0.000278
238,77403 (М947ОК67) Renault Logan,2025-02-17 06:53:32,55.767491. 37.834136,14,+,55.767491,37.834136,POINT (37.83414 55.76749),False,2025-02-17 06:53:33,37.834058,55.767483,False,0.004976,0.000278,17.914238,0.000278


In [72]:
# время в движении, часов
total_time = df_trips.loc[df_trips['speed'] > 0, 'time'].sum()
total_predicted_time = df_trips.loc[df_trips['speed'] > 0, 'predicted_time'].sum()
total_time_saved = total_time - total_predicted_time

print(f"Общее время в движении: {total_time :.2f} часов")
print(f"Общая экономия времени: {total_time_saved :.2f} часов")
print(f"% экономии времени: {total_time_saved / total_time  * 100  :.2f} %")

Общее время в движении: 6.98 часов
Общая экономия времени: 0.46 часов
% экономии времени: 6.54 %
