In [5]:
import pandas as pd

def count_city_stats_with_dates(file_path):
    """
    统计数据集中每个城市的记录数量及其时间范围
    
    参数:
    file_path (str): 数据文件的路径
    
    返回:
    tuple: (城市列表, 城市统计 DataFrame)
    """
    # 读取数据
    try:
        df = pd.read_csv(file_path)
    except FileNotFoundError:
        print(f"错误: 文件 {file_path} 未找到")
        return None, None
    
    # 检查必要的列
    if 'Location' not in df.columns or 'Date_Time' not in df.columns:
        print("错误: 数据中未找到 'Location' 或 'Date_Time' 列")
        return None, None
    
    # 转换时间列
    df['Date_Time'] = pd.to_datetime(df['Date_Time'], errors='coerce')
    
    # 删除无效时间数据
    df = df.dropna(subset=['Date_Time'])
    
    # 统计每个城市的记录数和时间范围
    city_stats = df.groupby('Location').agg(
        Count=('Location', 'size'),
        Start_Date=('Date_Time', 'min'),
        End_Date=('Date_Time', 'max')
    ).reset_index()
    
    # 按城市名称排序
    city_stats = city_stats.sort_values('Location')
    
    # 获取唯一城市列表
    unique_cities = city_stats['Location'].unique().tolist()
    
    return unique_cities, city_stats

# 使用示例
if __name__ == "__main__":
    data_file = "../Data/weather_data2.csv"  # 替换为你的路径
    
    cities, city_df = count_city_stats_with_dates(data_file)
    
    if cities is not None:
        print("\n数据集中出现的所有城市:")
        print("-" * 30)
        for i, city in enumerate(cities, 1):
            print(f"{i}. {city}")
        
        print("\n各城市数据统计（记录数量 + 时间范围）:")
        print("-" * 50)
        print(city_df.to_string(index=False))
        
        # 可选: 保存结果
        city_df.to_csv("city_statistics_with_dates.csv", index=False)
        print("\n统计结果已保存到 city_statistics_with_dates.csv")



数据集中出现的所有城市:
------------------------------
1. Chicago
2. Dallas
3. Houston
4. Los Angeles
5. New York
6. Philadelphia
7. Phoenix
8. San Antonio
9. San Diego
10. San Jose

各城市数据统计（记录数量 + 时间范围）:
--------------------------------------------------
    Location  Count          Start_Date            End_Date
     Chicago 100164 2024-01-01 00:03:25 2024-05-18 19:43:30
      Dallas  99936 2024-01-01 00:00:46 2024-05-18 19:43:13
     Houston 100076 2024-01-01 00:00:06 2024-05-18 19:44:02
 Los Angeles  99922 2024-01-01 00:00:15 2024-05-18 19:42:28
    New York  99972 2024-01-01 00:04:45 2024-05-18 19:43:55
Philadelphia 100122 2024-01-01 00:02:01 2024-05-18 19:43:50
     Phoenix 100209 2024-01-01 00:00:06 2024-05-18 19:43:26
 San Antonio  99962 2024-01-01 00:00:18 2024-05-18 19:44:10
   San Diego  99774 2024-01-01 00:00:31 2024-05-18 19:43:11
    San Jose  99863 2024-01-01 00:03:06 2024-05-18 19:43:21

统计结果已保存到 city_statistics_with_dates.csv
