In [1]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import time

# 加州主要城市及其经纬度
cities = {
    "Los Angeles": (34.0522, -118.2437),
    "San Francisco": (37.7749, -122.4194),
    "San Diego": (32.7157, -117.1611),
    "Sacramento": (38.5816, -121.4944),
    "San Jose": (37.3382, -121.8863),
    "Fresno": (36.7378, -119.7871),
    "Oakland": (37.8044, -122.2712),
    "Long Beach": (33.7701, -118.1937),
}

# 时间范围（2011-01-29 到 2016-04-23）
start_date = "2011-01-29"
end_date = "2016-04-23"

# 存储所有城市的数据
all_data = []

for city, (lat, lon) in cities.items():
    print(f"Fetching data for {city}...")
    
    # Open-Meteo API 请求
    url = f"https://archive-api.open-meteo.com/v1/archive?latitude={lat}&longitude={lon}&start_date={start_date}&end_date={end_date}&daily=temperature_2m_mean,temperature_2m_max,temperature_2m_min,precipitation_sum,windspeed_10m_max,relative_humidity_2m_mean&timezone=America/Los_Angeles"
    
    response = requests.get(url)
    data = response.json()
    
    # 转换为 DataFrame
    df = pd.DataFrame({
        "date": data["daily"]["time"],
        "avg_temp": data["daily"]["temperature_2m_mean"],
        "max_temp": data["daily"]["temperature_2m_max"],
        "min_temp": data["daily"]["temperature_2m_min"],
        "total_precip": data["daily"]["precipitation_sum"],
        "wind_speed": data["daily"]["windspeed_10m_max"],
        "humidity": data["daily"]["relative_humidity_2m_mean"],
    })
    
    df["date"] = pd.to_datetime(df["date"])
    df["city"] = city
    
    all_data.append(df)
    
    # 避免 API 请求过快被限制
    time.sleep(2)

# 合并所有城市的数据
full_df = pd.concat(all_data)

# 按周聚合（Week 1 ~ 273）
full_df["week"] = ((full_df["date"] - pd.to_datetime(start_date)).dt.days // 7) + 1
weekly_data = full_df.groupby(["week", "city"]).agg({
    "avg_temp": "mean",
    "max_temp": "max",
    "min_temp": "min",
    "total_precip": "sum",
    "wind_speed": "mean",
    "humidity": "mean",
}).reset_index()

# 调整列名（方便后续分析）
weekly_data.columns = ["week", "city", "avg_temp", "max_temp", "min_temp", "total_precip", "wind_speed", "humidity"]

# 保存为 CSV
weekly_data.to_csv("california_weekly_weather.csv", index=False)
print("数据已保存为 california_weekly_weather.csv")

Fetching data for Los Angeles...
Fetching data for San Francisco...
Fetching data for San Diego...
Fetching data for Sacramento...
Fetching data for San Jose...
Fetching data for Fresno...
Fetching data for Oakland...
Fetching data for Long Beach...
数据已保存为 california_weekly_weather.csv


In [None]:
import requests
import pandas as pd
from datetime import datetime

# 洛杉矶的经纬度
lat, lon = 34.0522, -118.2437

# 时间范围（2011-01-29 到 2016-04-23）
start_date = "2011-01-29"
end_date = "2016-04-23"

# Open-Meteo API 请求
url = f"https://archive-api.open-meteo.com/v1/archive?latitude={lat}&longitude={lon}&start_date={start_date}&end_date={end_date}&daily=temperature_2m_mean,temperature_2m_max,temperature_2m_min,precipitation_sum,windspeed_10m_max,relative_humidity_2m_mean&timezone=America/Los_Angeles"

response = requests.get(url)
data = response.json()

# 转换为 DataFrame
df = pd.DataFrame({
    "date": data["daily"]["time"],
    "avg_temp": data["daily"]["temperature_2m_mean"],
    "max_temp": data["daily"]["temperature_2m_max"],
    "min_temp": data["daily"]["temperature_2m_min"],
    "precipitation": data["daily"]["precipitation_sum"],
    "wind_speed": data["daily"]["windspeed_10m_max"],
    "humidity": data["daily"]["relative_humidity_2m_mean"],
})

# 计算周数（Week 1 ~ 273）
df["date"] = pd.to_datetime(df["date"])
df["week"] = ((df["date"] - pd.to_datetime(start_date)).dt.days // 7) + 1

# 按周聚合
weekly_data = df.groupby("week").agg({
    "avg_temp": "mean",
    "max_temp": "max",
    "min_temp": "min",
    "precipitation": "sum",
    "wind_speed": "mean",
    "humidity": "mean",
}).reset_index()

# 保存为 CSV
weekly_data.to_csv("los_angeles_weekly_weather.csv", index=False)
print("数据已保存为 los_angeles_weekly_weather.csv")

数据已保存为 los_angeles_weekly_weather.csv


In [None]:
weather_data = pd.read_csv("los_angeles_weekly_weather.csv")

# 3. 按 week 列合并（保留所有销售记录）
merged_data = pd.merge(
    left=struc_data,
    right=weather_data,
    on="week",
    how="left"  # 保留所有销售记录，没有天气数据的周填充NaN
)

# 4. 检查合并结果
print(merged_data.head())
print(f"合并后的列名：{merged_data.columns.tolist()}")