In [1]:
import pykoda
import pandas as pd
import numpy as np
import requests



# 定义公司、日期和时间范围
company = 'sl'  # 公司代码，例如 'sl' 表示 Stockholm
date = '2022-12-03'  # 日期，格式为 YYYY-MM-DD
start_hour = 0
end_hour = 23
feed = 'TripUpdates'  # 选项包括 'VehiclePositions', 'TripUpdates', 'ServiceAlerts'


# 加载实时数据
realtime_data = pykoda.datautils.get_data_range(
    feed=feed,
    company=company,
    start_date=date,
    start_hour=start_hour,
    end_date=date,
    end_hour=end_hour,
    merge_static=False  # 是否合并静态数据
)

# 检查和显示结果
if not realtime_data.empty:
    print("成功加载实时数据！")
    print(realtime_data.head())
else:
    print("实时数据加载失败或数据为空。")

import os

# 定义输出目录和文件名
output_dir = "./output_csv"
output_file = os.path.join(output_dir, f"{company}_{feed}_{date}_data.csv")

# 创建输出目录（如果不存在）
os.makedirs(output_dir, exist_ok=True)

# 检查和保存实时数据到 CSV 文件
if not realtime_data.empty:
    print("成功加载实时数据，正在保存为 CSV 文件...")
    realtime_data.to_csv(output_file, index=False, encoding='utf-8')
    print(f"数据已保存至 {output_file}")
else:
    print("实时数据加载失败或数据为空，未生成 CSV 文件。")



# 定义 CSV 文件路径
file_path = "./output_csv/sl_TripUpdates_2022-12-02_data.csv"

# 初始化 data 变量
data = None

# 读取 CSV 文件
try:
    data = pd.read_csv(file_path)
    print("CSV 文件读取成功！")
except FileNotFoundError:
    print(f"文件未找到：{file_path}")
except Exception as e:
    print(f"读取 CSV 文件时出错: {e}")



# 检查是否有时间戳列需要转换
if 'timestamp' in data.columns:
    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
if 'arrival_time' in data.columns:
    data['arrival_time'] = pd.to_datetime(data['arrival_time'], unit='s')
if 'departure_time' in data.columns:
    data['departure_time'] = pd.to_datetime(data['departure_time'], unit='s')
if 'vehicle_id' in data.columns:
    data['vehicle_id'] = data['vehicle_id'].astype(str)

# 将 trip_id 转换为字符串格式
if 'trip_id' in data.columns:
    data['trip_id'] = data['trip_id'].apply(lambda x: str(int(float(x))) if pd.notna(x) else None)

# 删除不需要的列
columns_to_remove = [
    'start_time', 'schedule_relationship', 'route_id', 'direction_id',
    'arrival_uncertainty', 'departure_uncertainty', 'tripUpdate_stopTimeUpdate_scheduleRelationship','start_date','timestamp',
    'vehicle_id','datetime'
]
cleaned_data = data.drop(columns=columns_to_remove)

cleaned_data.head()

Loading data:   0%|                                                                             | 0/24 [00:00<?, ?it/s]

Getting C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_0.feather
File or folder already exists, so skipping task: data (datafolder)
Executing python function, producing output(s): C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2.tmp
C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.tar
Executing python function, producing output(s): C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_0.feather.tmp
sl-tripupdates-2022-12-01T10


Loading data:  12%|████████▋                                                            | 3/24 [00:57<05:14, 14.97s/it]

Getting C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_1.feather
File or folder already exists, so skipping task: data (datafolder)
File or folder already exists, so skipping task: C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2 (file)
System tool failed: Command '['7z', 'e', '-oC:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03', 'C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2']' returned non-zero exit status 255.
None
Executing python function, producing output(s): C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_1.feather.tmp
Getting C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_2.feather
File or folder already exists, so skipping task: data (datafolder)
File or folder already exists, so skipping task: C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2 (file)
System tool failed: Command '['7z', 

Loading data:  29%|████████████████████▏                                                | 7/24 [00:57<01:12,  4.24s/it]

System tool failed: Command '['7z', 'e', '-oC:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03', 'C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2']' returned non-zero exit status 255.
None
Executing python function, producing output(s): C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_3.feather.tmp
Getting C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_4.feather
File or folder already exists, so skipping task: data (datafolder)
File or folder already exists, so skipping task: C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2 (file)
System tool failed: Command '['7z', 'e', '-oC:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03', 'C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2']' returned non-zero exit status 255.
None
Executing python function, producing output(s): C:/Users/asus/AppData/Local/pykoda/pyko

Loading data:  38%|█████████████████████████▉                                           | 9/24 [00:58<00:40,  2.67s/it]

Getting C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_7.feather
File or folder already exists, so skipping task: data (datafolder)
File or folder already exists, so skipping task: C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2 (file)
System tool failed: Command '['7z', 'e', '-oC:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03', 'C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2']' returned non-zero exit status 255.
None
Executing python function, producing output(s): C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_7.feather.tmp
Getting C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_8.feather
File or folder already exists, so skipping task: data (datafolder)
File or folder already exists, so skipping task: C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2 (file)
System tool failed: Command '['7z', 



System tool failed: Command '['7z', 'e', '-oC:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03', 'C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2']' returned non-zero exit status 255.
None
Executing python function, producing output(s): C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_9.feather.tmp
Getting C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_10.feather
File or folder already exists, so skipping task: data (datafolder)
File or folder already exists, so skipping task: C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2 (file)
System tool failed: Command '['7z', 'e', '-oC:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03', 'C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2']' returned non-zero exit status 255.
None
Executing python function, producing output(s): C:/Users/asus/AppData/Local/pykoda/pyk



Getting C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_13.feather
File or folder already exists, so skipping task: data (datafolder)
File or folder already exists, so skipping task: C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2 (file)
System tool failed: Command '['7z', 'e', '-oC:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03', 'C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2']' returned non-zero exit status 255.
None
Executing python function, producing output(s): C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_13.feather.tmp
Getting C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_14.feather
File or folder already exists, so skipping task: data (datafolder)
File or folder already exists, so skipping task: C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2 (file)
System tool failed: Command '['7z



File or folder already exists, so skipping task: C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2 (file)
System tool failed: Command '['7z', 'e', '-oC:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03', 'C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2']' returned non-zero exit status 255.
None
Executing python function, producing output(s): C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_16.feather.tmp
Getting C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_17.feather
File or folder already exists, so skipping task: data (datafolder)
File or folder already exists, so skipping task: C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2 (file)
System tool failed: Command '['7z', 'e', '-oC:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03', 'C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-0

Loading data:  88%|███████████████████████████████████████████████████████████▌        | 21/24 [00:58<00:00,  3.16it/s]

System tool failed: Command '['7z', 'e', '-oC:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03', 'C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2']' returned non-zero exit status 255.
None
Executing python function, producing output(s): C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_18.feather.tmp
Getting C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_19.feather
File or folder already exists, so skipping task: data (datafolder)
File or folder already exists, so skipping task: C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2 (file)
System tool failed: Command '['7z', 'e', '-oC:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03', 'C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2']' returned non-zero exit status 255.
None
Executing python function, producing output(s): C:/Users/asus/AppData/Local/pykoda/py

Loading data: 100%|████████████████████████████████████████████████████████████████████| 24/24 [00:59<00:00,  2.46s/it]


File or folder already exists, so skipping task: C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2 (file)
System tool failed: Command '['7z', 'e', '-oC:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03', 'C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2']' returned non-zero exit status 255.
None
Executing python function, producing output(s): C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_22.feather.tmp
Getting C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl_TripUpdates_2022_12_03_23.feather
File or folder already exists, so skipping task: data (datafolder)
File or folder already exists, so skipping task: C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03.bz2 (file)
System tool failed: Command '['7z', 'e', '-oC:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-03', 'C:/Users/asus/AppData/Local/pykoda/pykoda/cache/sl-tripupdates-2022-12-0

Unnamed: 0,id,trip_id,stop_sequence,stop_id,arrival_delay,arrival_time,departure_delay,departure_time
0,14010514477669904,14010000550076808,21,9022001005191001,-48.0,2022-12-01 22:49:12,21.0,2022-12-01 22:50:21
1,14010514477292193,14010000550098468,23,9022001005221005,69.0,2022-12-01 22:48:09,8.0,2022-12-01 22:50:08
2,14010514402530242,14010000611588680,20,9022001004537001,11.0,2022-12-01 22:49:41,42.0,2022-12-01 22:50:12
3,14010514538169516,14010000607261968,28,9022001040463003,277.0,2022-12-01 22:50:11,277.0,2022-12-01 22:50:11
4,14010514537708561,14010000539915094,11,9022001041309001,195.0,2022-12-01 22:50:27,195.0,2022-12-01 22:50:27


In [6]:
from sklearn.cluster import DBSCAN


# 读取 stops.txt 文件
static_data_path = r"C:\Users\asus\AppData\Local\pykoda\pykoda\Cache\sl_static_2022_12_01\stops.txt"
stops_data = pd.read_csv(static_data_path)

# 提取 stop_id, stop_lat, stop_lon
stops_data = stops_data[['stop_id', 'stop_lat', 'stop_lon']]

# 使用 DBSCAN 聚类算法按经纬度分组
coords = stops_data[['stop_lat', 'stop_lon']].values
db = DBSCAN(eps=0.01, min_samples=1, metric='euclidean').fit(coords)

# 将分组标签添加到 stops_data
stops_data['group_id'] = db.labels_

# 查看分组结果
print(stops_data.head())

            stop_id   stop_lat   stop_lon  group_id
0  9021001000101000  59.286405  18.704700         0
1  9021001000102000  59.280184  18.731426         1
2  9021001000103000  59.208731  18.740060         2
3  9021001000104000  59.215843  18.753410         3
4  9021001000105000  59.204309  18.757769         4


In [8]:
# 查看数据行数
num_rows = stops_data.shape[0]

# 打印结果
print(f"数据共有 {num_rows} 行")

group_count = stops_data['group_id'].nunique()
print(f"一共有 {group_count} 个 group_id")

数据共有 21508 行
一共有 853 个 group_id


In [9]:
import requests

# 定义函数，获取天气数据
def get_historical_weather(lat, lon, start_date, end_date):
    variables = "temperature_2m,precipitation,snowfall,snow_depth"
    url = f"https://archive-api.open-meteo.com/v1/era5?latitude={lat}&longitude={lon}&start_date={start_date}&end_date={end_date}&hourly={variables}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        return pd.DataFrame({
            'datetime': data['hourly']['time'],
            'temperature': data['hourly']['temperature_2m'],
            'precipitation': data['hourly']['precipitation'],
            'snowfall': data['hourly']['snowfall'],
            'snow_depth': data['hourly']['snow_depth']
        })
    else:
        print(f"Error fetching weather data for {lat}, {lon}: {response.status_code}")
        return pd.DataFrame()

# 查询日期
query_date = date

# 查询每个分组的天气
group_weather_data = []
group_centers = stops_data.groupby('group_id')[['stop_lat', 'stop_lon']].mean().reset_index()
for _, row in group_centers.iterrows():
    group_id = row['group_id']
    lat = row['stop_lat']
    lon = row['stop_lon']
    print(f"Fetching weather data for group_id={group_id}...")
    weather = get_historical_weather(lat, lon, query_date, query_date)
    weather['group_id'] = group_id  # 添加 group_id 到结果中
    group_weather_data.append(weather)

# 合并所有分组的天气数据
group_weather_data = pd.concat(group_weather_data, ignore_index=True)



# 合并天气数据到 stops_data
stops_weather = stops_data.merge(group_weather_data, on='group_id', how='left')

# 保存结果
stops_weather.to_csv("stops_weather_data_grouped.csv", index=False)

# 显示结果
print(stops_weather.head())


Fetching weather data for group_id=0.0...
Error fetching weather data for 59.286894499999995, 18.704328: 429
Fetching weather data for group_id=1.0...
Error fetching weather data for 59.280184, 18.731426: 429
Fetching weather data for group_id=2.0...
Error fetching weather data for 59.208731, 18.74006: 429
Fetching weather data for group_id=3.0...
Error fetching weather data for 59.215843, 18.75341: 429
Fetching weather data for group_id=4.0...
Error fetching weather data for 59.204309, 18.757769: 429
Fetching weather data for group_id=5.0...
Error fetching weather data for 59.197456, 18.739825: 429
Fetching weather data for group_id=6.0...
Error fetching weather data for 59.191628, 18.719581: 429
Fetching weather data for group_id=7.0...
Error fetching weather data for 59.18379525, 18.700166: 429
Fetching weather data for group_id=8.0...
Error fetching weather data for 59.177537, 18.669835: 429
Fetching weather data for group_id=9.0...
Error fetching weather data for 59.17158966666667

KeyboardInterrupt: 

In [None]:
# 还差把天气（stops_weather），用站点id+对应时间 merge到realtime里面（cleaned_data） 对时间要做点处理