In [None]:
import pandas as pd
import numpy as np
import networkx as nx
from matplotlib import pyplot as plt
from matplotlib.ticker import MultipleLocator
from matplotlib.ticker import FuncFormatter, MaxNLocator, ScalarFormatter
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
plt.style.use('seaborn-v0_8-paper')
# # 设置全局参数
plt.rcParams['figure.facecolor'] = 'white'  # 设置图形的背景为透明
plt.rcParams['axes.facecolor'] = 'white'    # 设置轴域的背景为透明
plt.rcParams['savefig.facecolor'] = 'white' # 保存图像时背景透明
plt.rcParams['axes.grid'] = False
import seaborn as sns
import joblib
# plt.rcParams['font.family']='Times New Roman,Microsoft YaHei'# 设置字体族，中文为微软雅黑，英文为Times New Roman
plt.rcParams['font.sans-serif'] = 'Times New Roman'
plt.rcParams['mathtext.fontset'] = 'stix'  # 设置数学公式字体为stix
plt.rcParams["text.usetex"] = False
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
import statsmodels.api as sm

In [42]:
data=pd.read_csv('dataebasnona.csv')

In [43]:
# 确保 'Date' 列为 datetime 类型
data['Date'] = pd.to_datetime(data['Date'])
# 提取月份
data['Month'] = data['Date'].dt.month
# 选择夏季（6月、7月、8月）数据
data = data[data['Month'].isin([6, 7, 8])]
# 去掉辅助的 'Month' 列
data = data.drop(columns=['Month'])

In [44]:
data['isoprene']=data['isoprene']/1000

In [46]:


# 初始化一个空的列表，用于存储每个 id 的处理结果
data_processed_list = []

# 遍历每个 id
for id_value in data['id'].unique():
    # 提取当前 id 的数据
    data_id = data[data['id'] == id_value].copy()
    
    # 获取当前 id 的分辨率
    resolution = data_id['resolution'].iloc[0]
    
    # 根据分辨率确定分组方式
    if resolution in ['1d', '35h', '5d']:
        # 按天（1号到31号）分组，不包含年和月
        data_id['Day'] = data_id['Date'].dt.day
        data_id['Hour'] = 0  # 没有小时信息，设为0
        group_cols = ['Day']
        # 重新构建 Date 列，使用虚拟的年份和月份
        data_id['Date'] = pd.to_datetime({'year': 2023, 'month': 1, 'day': data_id['Day']})
    elif resolution in ['1h', '2659s']:
        # 按小时（0点到24点）分组，不包含年、月和日
        data_id['Day'] = 1  # 没有日期信息，设为1
        data_id['Hour'] = data_id['Date'].dt.hour
        group_cols = ['Hour']
        # 重新构建 Date 列，使用虚拟的年份、月份和日期
        data_id['Date'] = pd.to_datetime({'year': 2023, 'month': 1, 'day': data_id['Day'], 'hour': data_id['Hour']})
    else:
        # 如果分辨率不在已知范围内，跳过该 id
        print(f"未知的分辨率：{resolution}，跳过 id：{id_value}")
        continue
    
    # 提取需要的列
    columns_to_keep = ['Date', 'Day', 'Hour', 'id', 'resolution', 'isoprene', 'temperature', 'longitude', 'latitude']
    data_id = data_id[columns_to_keep]
    
    # 确保 'isoprene' 和 'temperature' 列为数值型
    data_id['isoprene'] = pd.to_numeric(data_id['isoprene'], errors='coerce')
    data_id['temperature'] = pd.to_numeric(data_id['temperature'], errors='coerce')
    
    
    # 按照分组列分组，计算均值和标准差
    grouped = data_id.groupby(group_cols)
    agg_dict = {
        'isoprene': ['mean', 'std'],
        'temperature': ['mean', 'std'],
        'longitude': 'first',
        'latitude': 'first',
        'id': 'first',
        'resolution': 'first',
        'Day': 'first',
        'Hour': 'first'
    }
    data_id_agg = grouped.agg(agg_dict).reset_index()
    
    # 扁平化多级列名
    data_id_agg.columns = ['_'.join(col).strip('_') for col in data_id_agg.columns.values]
    
    # 重新构建 Date 列
    if resolution in ['1d', '35h', '5d']:
        data_id_agg['Date'] = pd.to_datetime({'year': 2023, 'month': 1, 'day': data_id_agg['Day_first']})
    elif resolution in ['1h', '2659s']:
        data_id_agg['Date'] = pd.to_datetime({'year': 2023, 'month': 1, 'day': data_id_agg['Day_first'], 'hour': data_id_agg['Hour_first']})
    
    # 选择需要的列
    columns_to_select = [
        'Date', 'Day_first', 'Hour_first', 'id_first', 'resolution_first',
        'isoprene_mean', 'isoprene_std', 'temperature_mean', 'temperature_std',
        'longitude_first', 'latitude_first'
    ]
    data_id_final = data_id_agg[columns_to_select]
    
    # 重命名列名
    data_id_final.rename(columns={
        'Day_first': 'Day',
        'Hour_first': 'Hour',
        'id_first': 'id',
        'resolution_first': 'resolution',
        'isoprene_mean': 'isoprene',
        'isoprene_std': 'isoprene_std',
        'temperature_mean': 'temperature',
        'temperature_std': 'temperature_std',
        'longitude_first': 'longitude',
        'latitude_first': 'latitude'
    }, inplace=True)
    
    # 将处理后的数据添加到列表中
    data_processed_list.append(data_id_final)
    
# 合并所有 id 的数据
data_final = pd.concat(data_processed_list, ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_id_final.rename(columns={
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_id_final.rename(columns={
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_id_final.rename(columns={
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_id_final.rename(columns={
A value is trying to be set on a copy of a s

In [47]:
data['Day'] = data['Date'].dt.day
data['Hour'] = data['Date'].dt.hour

In [48]:
data_final

Unnamed: 0,Date,Day,Hour,id,resolution,isoprene,isoprene_std,temperature,temperature_std,longitude,latitude
0,2023-01-01 00:00:00,1,0,FR0030R,1d,0.064279,,13.392000,,2.964886,45.772223
1,2023-01-02 00:00:00,2,0,FR0030R,5d,0.318000,0.149880,14.588361,3.854793,2.964886,45.772223
2,2023-01-03 00:00:00,3,0,FR0030R,35h,0.063434,0.002485,14.837000,12.604885,2.964886,45.772223
3,2023-01-04 00:00:00,4,0,FR0030R,5d,0.096000,0.003464,14.957300,8.456536,2.964886,45.772223
4,2023-01-06 00:00:00,6,0,FR0030R,5d,0.253441,0.277977,18.878829,4.071279,2.964886,45.772223
...,...,...,...,...,...,...,...,...,...,...,...
125,2023-01-01 19:00:00,1,19,CH0010U,1h,0.064134,0.024972,19.899242,3.061881,8.530419,47.377586
126,2023-01-01 20:00:00,1,20,CH0010U,1h,0.069162,0.046761,18.419697,2.901205,8.530419,47.377586
127,2023-01-01 21:00:00,1,21,CH0010U,1h,0.072903,0.070800,17.300000,2.633142,8.530419,47.377586
128,2023-01-01 22:00:00,1,22,CH0010U,1h,0.068124,0.043187,16.891176,2.675691,8.530419,47.377586


In [49]:
data_final.columns

Index(['Date', 'Day', 'Hour', 'id', 'resolution', 'isoprene', 'isoprene_std',
       'temperature', 'temperature_std', 'longitude', 'latitude'],
      dtype='object')

In [36]:
from scipy.optimize import curve_fit
from scipy.stats import zscore
from matplotlib.ticker import FuncFormatter, MaxNLocator
import matplotlib.dates as mdates
from scipy.stats import t


# 定义与均值的关系函数（二次函数）
def mean_relation(T, Q0, a, v0):
    return Q0 + (a * T**2) / 2 + T * v0

# 定义与方差的关系函数（三次函数）
def std_dev_relation(T, k, sigma0):
    return (k**2 * T**3) / 3 + k * T**2 * sigma0 + T * sigma0**2

# 定义去除离群值的函数（使用 Z-score 方法）
def remove_outliers(x, y,filter=True):
    mask = ~np.isnan(x) & ~np.isnan(y)  # 去除 NaN 值
    x, y = x[mask], y[mask]
    if filter==True:
        z_scores = zscore(y)
        mask = np.abs(z_scores) < 3
        return x[mask], y[mask]
    else:
        return x, y
from scipy.spatial import ConvexHull
from shapely.geometry import Polygon
from shapely.ops import unary_union
from scipy.stats import norm

def compute_area(points):
    # 计算点的凸包区域
    hull = ConvexHull(points)
    polygon = Polygon(points[hull.vertices])
    return polygon.area

def monte_carlo_r_squared_area(T, mean_params, std_dev_params, real_data, num_simulations=1000):
    original_points = np.column_stack((T, real_data))
    original_area = compute_area(original_points)

    simulation_areas = []
    overlap_areas = []

    for _ in range(num_simulations):
        simulated_path = norm.rvs(
            loc=mean_relation(T, *mean_params),
            scale=np.sqrt(std_dev_relation(T, *std_dev_params)),
            size=len(T)
        )
        simulated_points = np.column_stack((T, simulated_path))
        sim_area = compute_area(simulated_points)
        simulation_areas.append(sim_area)

        original_polygon = Polygon(original_points[ConvexHull(original_points).vertices])
        simulated_polygon = Polygon(simulated_points[ConvexHull(simulated_points).vertices])
        intersection_area = original_polygon.intersection(simulated_polygon).area
        overlap_areas.append(intersection_area)
    
    mean_overlap_area = np.mean(overlap_areas)
    mean_simulation_area = np.mean(simulation_areas)

    # Calculate R² based on area overlap
    r_squared_area = mean_overlap_area / original_area
    return min(max(r_squared_area, 0), 1)  # Ensure R² is in [0, 1]

# 定义异常值清理函数
def clean_data(df, columns, threshold=3):
    for col in columns:
        df = df[np.abs(zscore(df[col])) < threshold]
    return df.reset_index(drop=True)

def hour_min_to_float(hour_min_str):
    hour, minute = map(int, hour_min_str.split(":"))
    return hour + minute / 60.0

In [51]:
# # 初始化绘图
# fig, axs = plt.subplots(3, 5, figsize=(16, 10))
# 
# # 获取唯一的 id 列表
# id_list = data_final['id'].unique()
# axtime=[]
# # 遍历每个 id
# for idx, id_value in enumerate(id_list):
#     # 计算子图的位置
#     col = idx % 5  # 列索引
# 
#     # 提取当前 id 的数据
#     data_id = data_final[data_final['id'] == id_value].copy()
#     datao=data[data['id'] == id_value].copy()
#     datao = clean_data(datao, ['temperature', 'isoprene'])
# 
#     # 提取经纬度（假设经纬度在整个 id 中是相同的）
#     longitude = data_id['longitude'].iloc[0].round(2)
#     latitude = data_id['latitude'].iloc[0].round(2)
#     sample_count = data[data['id'] == id_value].shape[0]
# 
#     # ------------------------------
#     # 第一行：温度对异戊二烯均值的拟合（二次函数）
#     # ------------------------------
#     ax_mean = axs[0, col]
#     # 提取温度和异戊二烯均值
#     T_mean = data_id['temperature'].values
#     mean_values = data_id['isoprene'].values
#     # 去除离群值和 NaN
#     T_mean_filtered, mean_values_filtered = remove_outliers(T_mean, mean_values)
#     # 检查数据是否为空
#     if len(T_mean_filtered) > 0 and len(mean_values_filtered) > 0:
#         # 拟合
#         try:
#             params_mean, covariance_mean = curve_fit(mean_relation, T_mean_filtered, mean_values_filtered, maxfev=10000, method='trf')
#             # 生成拟合曲线
#             T_plot = np.linspace(T_mean_filtered.min(), T_mean_filtered.max(), 100)
#             mean_fit = mean_relation(T_plot, *params_mean)
#             # 绘制拟合曲线
#             ax_mean.plot(T_plot, mean_fit, color='#D76364', linewidth=2)
#         except Exception as e:
#             ax_mean.text(0.5, 0.5, f'Fitting Failed:\n{e}', transform=ax_mean.transAxes, fontsize=12, ha='center')
#     else:
#         ax_mean.text(0.5, 0.5, 'No Data', transform=ax_mean.transAxes, fontsize=12, ha='center')
#     # 绘制散点图
#     ax_mean.scatter(T_mean_filtered, mean_values_filtered, alpha=0.6, marker='x', s=50, linewidth=3,color='#D76364')
#     ax_mean.scatter(datao['temperature'], datao['isoprene'], label='Original Observations', color='gray', alpha=0.1, marker='o', s=30)
#     
#     # 设置标题和标签
#     ax_mean.set_xlabel('Temperature (°C)', fontsize=12)
#     ax_mean.set_ylabel(r"$\boldsymbol{μ(T)}$ $\boldsymbol{μg/m^3}$", fontsize=12)
#     idname=data_id['resolution'].iloc[0]
#     
#     ax_mean.set_title(
#     f'ID: {id_value}\n(Lon: {longitude}, Lat: {latitude})\nResolution: {idname}\nSample Count: {sample_count}',
#     fontsize=14,
#     weight='bold')
#     
#     
#     ax_mean.tick_params(axis='both', labelsize=10)
#     # 设置刻度格式
#     ax_mean.xaxis.set_major_formatter(FuncFormatter(lambda x, _: f'{x:.1f}'))
#     ax_mean.yaxis.set_major_formatter(FuncFormatter(lambda y, _: f'{y:.2f}'))
#     
#     std_errors_mean = np.sqrt(np.diag(covariance_mean))
#     
#     # 计算残差
#     residuals = mean_values_filtered - mean_relation(T_mean_filtered, *params_mean)
#     
#     # 计算残差平方和 (SS_res) 和总平方和 (SS_tot)
#     ss_res = np.sum(residuals**2)
#     ss_tot = np.sum((mean_values_filtered - np.mean(mean_values_filtered))**2)
#     
#     # 计算 R² 值
#     r_squared = 1 - (ss_res / ss_tot)
#     
#     # 计算 P 值
#     n = len(T_mean_filtered)  # 样本数量
#     p = len(params_mean)      # 参数数量
#     dof = max(0, n - p)       # 自由度
#     t_vals = params_mean / std_errors_mean  # t 统计量
#     p_values = 2 * (1 - t.cdf(np.abs(t_vals), dof))  # P 值
#     
#     
#     # 在图上添加拟合参数和标准误差
#     # ax_mean.text(0.05, 0.95, f'$Q_0$={params_mean[0]:.3f}, $a$={params_mean[1]:.3f}, $v_0$={params_mean[2]:.3f}\n'
#     #               f'P-values: {p_values[0]:.3f}, {p_values[1]:.3f}, {p_values[2]:.3f}\n'
#     #              f'$R^2$= {r_squared:.3f}',
#     #              transform=ax_mean.transAxes, verticalalignment='top', fontsize=13)
#     ax_mean.text(0.05, 0.95, f'$Q_0$={params_mean[0]:.3f},\n$a$={params_mean[1]:.3f}\n $v_0$={params_mean[2]:.3f}\n'
#              f'$R^2$= {r_squared:.3f}',
#              transform=ax_mean.transAxes, verticalalignment='top', fontsize=13,zorder=5)
# 
#     # ------------------------------
#     # 第二行：温度对异戊二烯标准差的拟合（三次函数)
#     # ------------------------------
#     ax_std = axs[1, col]
#     # 提取温度和异戊二烯标准差
#     T_std = data_id['temperature'].values
#     std_values = data_id['isoprene_std'].values
#     # 去除离群值和 NaN
#     T_std_filtered, std_values_filtered = remove_outliers(T_std, std_values)
#     # 检查数据是否为空
#     if len(T_std_filtered) > 0 and len(std_values_filtered) > 0:
#         # 拟合
#         try:
#             params_std, covariance_std = curve_fit(std_dev_relation, T_std_filtered, std_values_filtered, maxfev=10000, method='trf',
#     bounds=([0, -np.inf], [np.inf, np.inf]))
#             # 生成拟合曲线
#             T_plot_std = np.linspace(T_std_filtered.min(), T_std_filtered.max(), 100)
#             std_fit = std_dev_relation(T_plot_std, *params_std)
#             # 绘制拟合曲线
#             ax_std.plot(T_plot_std, std_fit, color='#32B897', linewidth=2)
#         except Exception as e:
#             ax_std.text(0.5, 0.5, f'Fitting Failed:\n{e}', transform=ax_std.transAxes, fontsize=12, ha='center')
#     else:
#         ax_std.text(0.5, 0.5, 'No Data', transform=ax_std.transAxes, fontsize=12, ha='center')
#     # 绘制散点图
#     ax_std.scatter(T_std_filtered, std_values_filtered, alpha=0.6, marker='x', s=50, linewidth=3,color='#32B897')
#     # 设置标签
#     ax_std.set_xlabel('Temperature (°C)', fontsize=12)
#     ax_std.set_ylabel(r"$\boldsymbol{σ^2(T)}$", fontsize=12)
#     ax_std.tick_params(axis='both', labelsize=10)
#     # 设置刻度格式
#     ax_std.xaxis.set_major_formatter(FuncFormatter(lambda x, _: f'{x:.1f}'))
#     ax_std.yaxis.set_major_formatter(FuncFormatter(lambda y, _: f'{y:.2f}'))
#     
#     
#     # 定义原始分辨率（以秒为单位）
#     original_resolution = 60*60
#     
#     # 定义一个函数，将分辨率字符串转换为秒
#     def resolution_to_seconds(res):
#         import re
#         match = re.match(r'(\d+)([dhms])', res)
#         if match:
#             value, unit = match.groups()
#             value = int(value)
#             if unit == 'd':
#                 return value * 86400
#             elif unit == 'h':
#                 return value * 3600
#             elif unit == 'm':
#                 return value * 60
#             elif unit == 's':
#                 return value
#         else:
#             raise ValueError(f"无法解析的分辨率格式: {res}")
#     
#     # 计算当前分辨率对应的秒数
#     current_resolution = resolution_to_seconds(resolution)
#     
#     # 计算 N
#     N = current_resolution / original_resolution
#     
#     
#     std_errors_std = np.sqrt(np.diag(covariance_std))
#     p_values_std_dev = np.diag(covariance_std)
#     params_std_o = params_std 
#     params_std = params_std / np.sqrt(N)
#     
#     # 计算标准误差
#     std_errors_std = np.sqrt(np.diag(covariance_std)) / np.sqrt(N)
#     
#   # 计算残差
#     residuals = std_values_filtered - std_dev_relation(T_std_filtered, *params_std)
#     
#     # 计算残差平方和 (SS_res)
#     ss_res = np.sum(residuals**2)
#     
#     # 计算总平方和 (SS_tot)
#     ss_tot = np.sum((std_values_filtered - np.mean(std_values_filtered))**2)
#     
#     # 计算 R² 值
#     r_squared = 1 - (ss_res / ss_tot)
# 
#     
#     
#     # 计算 t 统计量
#     t_vals_std = params_std / std_errors_std
#     
#     # 计算自由度
#     n = len(T_std_filtered)  # 样本数量
#     p = len(params_std)       # 参数数量
#     dof = max(0, n - p)       # 自由度
#     
#     # 计算 P 值
#     p_values = 2 * (1 - t.cdf(np.abs(t_vals_std), dof))
# 
#     # 在图上添加拟合参数和标准误差
#     # ax_std.text(0.05, 0.95, f'$k$={params_std [0]:.3f}, $σ_0$={params_std [1]:.3f}\n'
#     #               f'P-values: {p_values[0]:.3f}, {p_values[1]:.3f}\n',
#     #              transform=ax_std.transAxes, verticalalignment='top', fontsize=13)
#     ax_std.text(0.05, 0.95, f'$k$={params_std [0]:.3f}, $σ_0$={params_std [1]:.3f}\n',
#              transform=ax_std.transAxes, verticalalignment='top', fontsize=13)
# 
# 
#     # ------------------------------
#     # 第三行：异戊二烯随时间的变化（带置信区间）
#     # ------------------------------
#     ax_time = axs[2, col]
#     # 提取当前 id 的分辨率
#     resolution = data_id['resolution'].iloc[0]
#     
#     # 判断分辨率，选择 x 轴变量
#     if resolution in ['1d', '35h', '5d']:
#         x_variable = 'Day'
#         x_label = 'Day'
#     else:
#         x_variable = 'Hour'
#         x_label = 'Hour'
#     
#     # 提取 x 轴变量、异戊二烯均值和标准差
#     time_df = datao[[x_variable, 'isoprene', 'temperature']].dropna()
#     time_df = time_df.sort_values(x_variable)
#     
#     
#     ax_time.scatter(time_df[x_variable], time_df['isoprene'], color='grey', alpha=0.3, marker='o', s=20,zorder=10)
#     total_r_squared = monte_carlo_r_squared_area(
#     time_df['temperature'], params_mean, params_std_o, time_df['isoprene']
#     )
#     
#     # 设置调色板
#     ns=5
#     cmap = sns.light_palette("#5F97D2", as_cmap=True)
#     palette = [cmap(x) for x in np.linspace(0.2, 0.85, ns)]
#     for j in range(ns):
#         simulated_path = norm.rvs(loc=mean_relation(time_df['temperature'], *params_mean),
#                                   scale=np.sqrt(std_dev_relation(time_df['temperature'], *params_std_o)), size=time_df['temperature'].shape[0])
#         ax_time.plot(time_df[x_variable], simulated_path, color=palette[j], alpha=0.5)
# 
# 
# 
#       # 注释参数
#     ax_time.text(0.05, 0.9, f'JH:\nTotal $R^2$= {total_r_squared:.3f}', 
#                 transform=ax_time.transAxes, verticalalignment='top', fontsize=16, bbox=dict(facecolor='none', edgecolor='none', boxstyle='round,pad=0.3'))
#     
# 
#     # 设置标签
#     ax_time.set_xlabel(x_label, fontsize=12)
#     ax_time.set_ylabel(r'Isoprene ($\boldsymbol{μg/m^3}$)', fontsize=12)
#     ax_time.tick_params(axis='both', labelsize=10)
#     # 添加经纬度信息
#     # ax_time.text(0.5, 0.9, f'Lon: {longitude}, Lat: {latitude}', transform=ax_time.transAxes, fontsize=10, ha='center')
#     # 调整 X 轴标签旋转角度
#     for label in ax_time.get_xticklabels():
#         label.set_rotation(0)
#         label.set_horizontalalignment('right')
#         
#     axtime.append(ax_time)
#         
#         
# for text in fig.findobj(match=plt.Text):
#     text.set_fontsize(16)
#     text.set_weight('bold')
#     
# from matplotlib.lines import Line2D
# for i in axtime:
#     # 添加自定义图例
#     legend_elements = [
#     Line2D([0], [0], color=palette[ns-1], lw=2, label='Monte-Carlo\nSimulation'),
#     Line2D([0], [0], marker='o', color='black', label='Data Point', markerfacecolor='grey', markersize=8, alpha=0.7,linewidth=0)
#     ]
#     legend = i.legend(handles=legend_elements, loc='lower right', ncol=1, frameon=False, fontsize=12)
#     legend.set_zorder(50)
# 
# 
# # 调整布局
# plt.tight_layout()
# # 保存图像
# plt.savefig('isoprene_fitting_plots.svg', format='svg', bbox_inches='tight')
# plt.show()

In [18]:
data_id

Unnamed: 0,Date,Day,Hour,id,resolution,isoprene,isoprene_std,temperature,temperature_std,longitude,latitude
106,2023-01-01 00:00:00,1,0,CH0010U,1h,0.063473,0.019023,15.852239,2.540944,8.530419,47.377586
107,2023-01-01 01:00:00,1,1,CH0010U,1h,0.063165,0.016604,15.492754,2.639592,8.530419,47.377586
108,2023-01-01 02:00:00,1,2,CH0010U,1h,0.06369,0.016322,15.202817,2.693854,8.530419,47.377586
109,2023-01-01 03:00:00,1,3,CH0010U,1h,0.061594,0.002854,14.990845,2.908624,8.530419,47.377586
110,2023-01-01 04:00:00,1,4,CH0010U,1h,0.06363,0.015503,15.740769,2.482295,8.530419,47.377586
111,2023-01-01 05:00:00,1,5,CH0010U,1h,0.062804,0.008054,16.619672,2.392545,8.530419,47.377586
112,2023-01-01 06:00:00,1,6,CH0010U,1h,0.065674,0.021456,18.404545,2.767695,8.530419,47.377586
113,2023-01-01 07:00:00,1,7,CH0010U,1h,0.069406,0.037532,19.807813,3.089527,8.530419,47.377586
114,2023-01-01 08:00:00,1,8,CH0010U,1h,0.068418,0.032495,20.869841,3.188418,8.530419,47.377586
115,2023-01-01 09:00:00,1,9,CH0010U,1h,0.065043,0.02292,22.286719,3.740132,8.530419,47.377586


In [None]:
# 初始化绘图
fig, axs = plt.subplots(3, 5, figsize=(16, 10))

# 获取唯一的 id 列表
id_list = data_final['id'].unique()

# 遍历每个 id
for idx, id_value in enumerate(id_list):
    # 计算子图的位置
    col = idx % 5  # 列索引

    # 提取当前 id 的数据
    data_id = data_final[data_final['id'] == id_value].copy()

    # 提取经纬度（假设经纬度在整个 id 中是相同的）
    longitude = data_id['longitude'].iloc[0].round(2)
    latitude = data_id['latitude'].iloc[0].round(2)
    sample_count = data[data['id'] == id_value].shape[0]

    # ------------------------------
    # 第一行：温度对异戊二烯均值的拟合（二次函数）
    # ------------------------------
    ax_mean = axs[0, col]
    # 提取温度和异戊二烯均值
    T_mean = data_id['temperature'].values
    mean_values = data_id['isoprene'].values
    # 去除离群值和 NaN
    T_mean_filtered, mean_values_filtered = remove_outliers(T_mean, mean_values)
    # 检查数据是否为空
    if len(T_mean_filtered) > 0 and len(mean_values_filtered) > 0:
        # 拟合
        try:
            params_mean, covariance_mean = curve_fit(mean_relation, T_mean_filtered, mean_values_filtered, maxfev=10000, method='trf')
            # 生成拟合曲线
            T_plot = np.linspace(T_mean_filtered.min(), T_mean_filtered.max(), 100)
            mean_fit = mean_relation(T_plot, *params_mean)
            # 绘制拟合曲线
            ax_mean.plot(T_plot, mean_fit, color='red', linewidth=2)
        except Exception as e:
            ax_mean.text(0.5, 0.5, f'Fitting Failed:\n{e}', transform=ax_mean.transAxes, fontsize=12, ha='center')
    else:
        ax_mean.text(0.5, 0.5, 'No Data', transform=ax_mean.transAxes, fontsize=12, ha='center')
    # 绘制散点图
    ax_mean.scatter(T_mean_filtered, mean_values_filtered, alpha=0.6, marker='x', s=50, linewidth=3,color='grey')
    # 设置标题和标签
    ax_mean.set_xlabel('Temperature (°C)', fontsize=12)
    ax_mean.set_ylabel(r"$\boldsymbol{μ(T)}$ $\boldsymbol{μg/m^3}$", fontsize=12)
    idname=data_id['resolution'].iloc[0]

    ax_mean.set_title(
    f'ID: {id_value}\n(Lon: {longitude}, Lat: {latitude})\nResolution: {idname}\nSample Count: {sample_count}',
    fontsize=14,
    weight='bold')


    ax_mean.tick_params(axis='both', labelsize=10)
    # 设置刻度格式
    ax_mean.xaxis.set_major_formatter(FuncFormatter(lambda x, _: f'{x:.1f}'))
    ax_mean.yaxis.set_major_formatter(FuncFormatter(lambda y, _: f'{y:.2f}'))

    std_errors_mean = np.sqrt(np.diag(covariance_mean))

    # 计算残差
    residuals = mean_values_filtered - mean_relation(T_mean_filtered, *params_mean)

    # 计算残差平方和 (SS_res) 和总平方和 (SS_tot)
    ss_res = np.sum(residuals**2)
    ss_tot = np.sum((mean_values_filtered - np.mean(mean_values_filtered))**2)

    # 计算 R² 值
    r_squared = 1 - (ss_res / ss_tot)

    # 计算 P 值
    n = len(T_mean_filtered)  # 样本数量
    p = len(params_mean)      # 参数数量
    dof = max(0, n - p)       # 自由度
    t_vals = params_mean / std_errors_mean  # t 统计量
    p_values = 2 * (1 - t.cdf(np.abs(t_vals), dof))  # P 值


    # 在图上添加拟合参数和标准误差
    # ax_mean.text(0.05, 0.95, f'$Q_0$={params_mean[0]:.3f}, $a$={params_mean[1]:.3f}, $v_0$={params_mean[2]:.3f}\n'
    #               f'P-values: {p_values[0]:.3f}, {p_values[1]:.3f}, {p_values[2]:.3f}\n'
    #              f'$R^2$= {r_squared:.3f}',
    #              transform=ax_mean.transAxes, verticalalignment='top', fontsize=13)
    ax_mean.text(0.05, 0.95, f'$Q_0$={params_mean[0]:.3f},\n$a$={params_mean[1]:.3f}\n $v_0$={params_mean[2]:.3f}\n'
             f'$R^2$= {r_squared:.3f}',
             transform=ax_mean.transAxes, verticalalignment='top', fontsize=13,zorder=5)

    # ------------------------------
    # 第二行：温度对异戊二烯标准差的拟合（三次函数)
    # ------------------------------
    ax_std = axs[1, col]
    # 提取温度和异戊二烯标准差
    T_std = data_id['temperature'].values
    std_values = data_id['isoprene_std'].values
    # 去除离群值和 NaN
    T_std_filtered, std_values_filtered = remove_outliers(T_std, std_values)
    # 检查数据是否为空
    if len(T_std_filtered) > 0 and len(std_values_filtered) > 0:
        # 拟合
        try:
            params_std, covariance_std = curve_fit(std_dev_relation, T_std_filtered, std_values_filtered, maxfev=10000, method='trf',
    bounds=([0, -np.inf], [np.inf, np.inf]))
            # 生成拟合曲线
            T_plot_std = np.linspace(T_std_filtered.min(), T_std_filtered.max(), 100)
            std_fit = std_dev_relation(T_plot_std, *params_std)
            # 绘制拟合曲线
            ax_std.plot(T_plot_std, std_fit, color='green', linewidth=2)
        except Exception as e:
            ax_std.text(0.5, 0.5, f'Fitting Failed:\n{e}', transform=ax_std.transAxes, fontsize=12, ha='center')
    else:
        ax_std.text(0.5, 0.5, 'No Data', transform=ax_std.transAxes, fontsize=12, ha='center')
    # 绘制散点图
    ax_std.scatter(T_std_filtered, std_values_filtered, alpha=0.6, marker='x', s=50, linewidth=3,color='grey')
    # 设置标签
    ax_std.set_xlabel('Temperature (°C)', fontsize=12)
    ax_std.set_ylabel(r"$\boldsymbol{σ^2(T)}$", fontsize=12)
    ax_std.tick_params(axis='both', labelsize=10)
    # 设置刻度格式
    ax_std.xaxis.set_major_formatter(FuncFormatter(lambda x, _: f'{x:.1f}'))
    ax_std.yaxis.set_major_formatter(FuncFormatter(lambda y, _: f'{y:.2f}'))


    # 定义原始分辨率（以秒为单位）
    original_resolution = 60*60

    # 定义一个函数，将分辨率字符串转换为秒
    def resolution_to_seconds(res):
        import re
        match = re.match(r'(\d+)([dhms])', res)
        if match:
            value, unit = match.groups()
            value = int(value)
            if unit == 'd':
                return value * 86400
            elif unit == 'h':
                return value * 3600
            elif unit == 'm':
                return value * 60
            elif unit == 's':
                return value
        else:
            raise ValueError(f"无法解析的分辨率格式: {res}")

    # 计算当前分辨率对应的秒数
    current_resolution = resolution_to_seconds(resolution)

    # 计算 N
    N = current_resolution / original_resolution


    std_errors_std = np.sqrt(np.diag(covariance_std))
    p_values_std_dev = np.diag(covariance_std)
    params_std = params_std / np.sqrt(N)

    # 计算标准误差
    std_errors_std = np.sqrt(np.diag(covariance_std)) / np.sqrt(N)

  # 计算残差
    residuals = std_values_filtered - std_dev_relation(T_std_filtered, *params_std)

    # 计算残差平方和 (SS_res)
    ss_res = np.sum(residuals**2)

    # 计算总平方和 (SS_tot)
    ss_tot = np.sum((std_values_filtered - np.mean(std_values_filtered))**2)

    # 计算 R² 值
    r_squared = 1 - (ss_res / ss_tot)



    # 计算 t 统计量
    t_vals_std = params_std / std_errors_std

    # 计算自由度
    n = len(T_std_filtered)  # 样本数量
    p = len(params_std)       # 参数数量
    dof = max(0, n - p)       # 自由度

    # 计算 P 值
    p_values = 2 * (1 - t.cdf(np.abs(t_vals_std), dof))

    # 在图上添加拟合参数和标准误差
    # ax_std.text(0.05, 0.95, f'$k$={params_std [0]:.3f}, $σ_0$={params_std [1]:.3f}\n'
    #               f'P-values: {p_values[0]:.3f}, {p_values[1]:.3f}\n',
    #              transform=ax_std.transAxes, verticalalignment='top', fontsize=13)
    ax_std.text(0.05, 0.95, f'$k$={params_std [0]:.3f}, $σ_0$={params_std [1]:.3f}\n',
             transform=ax_std.transAxes, verticalalignment='top', fontsize=13)


    # ------------------------------
    # 第三行：异戊二烯随时间的变化（带置信区间）
    # ------------------------------
    ax_time = axs[2, col]
    # 提取当前 id 的分辨率
    resolution = data_id['resolution'].iloc[0]

    # 判断分辨率，选择 x 轴变量
    if resolution in ['1d', '35h', '5d']:
        x_variable = 'Day'
        x_label = 'Day'
    else:
        x_variable = 'Hour'
        x_label = 'Hour'

    # 提取 x 轴变量、异戊二烯均值和标准差
    time_df = data_id[[x_variable, 'isoprene', 'isoprene_std']].dropna()
    time_df = time_df.sort_values(x_variable)

    # 计算上下置信区间
    time_df['lower'] = time_df['isoprene'] - time_df['isoprene_std']
    time_df['upper'] = time_df['isoprene'] + time_df['isoprene_std']


     # 绘制均值曲线
    ax_time.plot(time_df[x_variable], time_df['isoprene'], color='#14517C', label='Mean Concentration')
    # 绘制置信区间（标准差）
    ax_time.fill_between(time_df[x_variable], time_df['lower'], time_df['upper'], color='#14517C', alpha=0.2, label='±1 Std Dev')

    # 设置标签
    ax_time.set_xlabel(x_label, fontsize=12)
    ax_time.set_ylabel(r'Isoprene ($\boldsymbol{μg/m^3}$)', fontsize=12)
    ax_time.tick_params(axis='both', labelsize=10)
    # 添加经纬度信息
    # ax_time.text(0.5, 0.9, f'Lon: {longitude}, Lat: {latitude}', transform=ax_time.transAxes, fontsize=10, ha='center')
    # 调整 X 轴标签旋转角度
    for label in ax_time.get_xticklabels():
        label.set_rotation(0)
        label.set_horizontalalignment('right')


for text in fig.findobj(match=plt.Text):
    text.set_fontsize(16)
    text.set_weight('bold')

# 调整布局
plt.tight_layout()
# 保存图像
plt.savefig('isoprene_fitting_plots.svg', format='svg', bbox_inches='tight')
plt.show()