In [60]:
import pandas as pd
import os
import numpy as np



def average_every_n_points(df, chunk_size=60, output_path=None):
    """
    对数据按指定的行数分块取平均，最终确保 DataFrame 记录数为 24 条。
    
    参数：
        df (DataFrame): 输入的数据框。
        chunk_size (int): 每多少行取一次平均，默认值为 60。
        output_path (str): 输出 CSV 文件路径，如果为 None，则不保存文件。
    
    返回：
        averaged_df (DataFrame): 每块数据的平均值组成的 DataFrame，最终包含 24 条记录。
    """
    # 每 chunk_size 行取平均
    averaged_data = []
    for i in range(0, len(df), chunk_size):
        # 获取当前块的数据
        chunk = df.iloc[i:i + chunk_size]
        # 对块取平均值
        chunk_mean = chunk.mean()
        # 添加到结果列表
        averaged_data.append(chunk_mean)
    
    # 生成新的 DataFrame
    averaged_df = pd.DataFrame(averaged_data)
    
    # 调整到 24 条记录
    if len(averaged_df) > 24:
        # 如果多于 24 条，截取前 24 条
        averaged_df = averaged_df.iloc[:24]
    elif len(averaged_df) < 24:
        # 如果少于 24 条，复制最后一行补齐
        last_row = averaged_df.iloc[-1:]  # 取最后一行
        rows_to_add = 24 - len(averaged_df)  # 计算需要补充的行数
        averaged_df = pd.concat([averaged_df, pd.concat([last_row] * rows_to_add, ignore_index=True)], ignore_index=True)
    
    # 如果指定了输出路径，保存文件
    if output_path:
        averaged_df.to_csv(output_path, index=False)
    
    return averaged_df




# 文件夹路径
simulation_ais_folder = "/workspaces/SFP/test_trajectory/Simulation_AIS"
trajectorylib_folder = "/workspaces/SFP/test_trajectory/trajectorylib"
output_folder = "/workspaces/SFP/test_trajectory/completed_trajectory"

# 创建输出文件夹
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# 遍历 Simulation AIS 文件夹中的文件
for simulation_file in os.listdir(simulation_ais_folder):
    if simulation_file.endswith(".csv"):
        # 获取文件序号
        file_num = os.path.splitext(simulation_file)[0]

        # 读取 Simulation AIS 数据
        simulation_path = os.path.join(simulation_ais_folder, simulation_file)
        simulation_data = pd.read_csv(simulation_path)
        simulation_data = average_every_n_points(simulation_data, chunk_size=60, output_path=None)

        # 读取对应的原始轨迹文件
        trajectory_file = f"{file_num}_"  # 匹配前缀
        trajectory_path = None
        for traj_file in os.listdir(trajectorylib_folder):
            if traj_file.startswith(trajectory_file) and traj_file.endswith(".csv"):
                trajectory_path = os.path.join(trajectorylib_folder, traj_file)
                break

        if trajectory_path is None:
            print(f"未找到与 {simulation_file} 对应的原始轨迹文件。")
            continue

        # 读取原始轨迹数据
        trajectory_data = pd.read_csv(trajectory_path)

        # 检查需要补全的列是否存在
        required_columns = [
            'draught', 'wind_val', 'wind_direction',
            'wave_val', 'wave_direction', 'stream_val', 'stream_direction'
        ]
        if not all(col in trajectory_data.columns for col in required_columns):
            print(f"原始轨迹文件 {trajectory_path} 缺少必要的列。")
            continue

        # 补全数据
        completed_data = simulation_data.copy()
        for idx, row in simulation_data.iterrows():
            # 计算原始轨迹中与当前点最接近的点
            distances = np.sqrt((trajectory_data['lon'] - row['lon'])**2 +
                                (trajectory_data['lat'] - row['lat'])**2)
            closest_idx = distances.idxmin()

            # 获取最接近点的数据并补全
            for col in required_columns:
                completed_data.loc[idx, col] = trajectory_data.loc[closest_idx, col]

        # 保存补全后的文件
        output_path = os.path.join(output_folder, f"{file_num}.csv")
        completed_data.to_csv(output_path, index=False)

        print(f"完成文件 {file_num}.csv 的补全并保存到 {output_folder}")


完成文件 2.csv 的补全并保存到 /workspaces/SFP/test_trajectory/completed_trajectory
完成文件 5.csv 的补全并保存到 /workspaces/SFP/test_trajectory/completed_trajectory
完成文件 1.csv 的补全并保存到 /workspaces/SFP/test_trajectory/completed_trajectory
完成文件 6.csv 的补全并保存到 /workspaces/SFP/test_trajectory/completed_trajectory
完成文件 0.csv 的补全并保存到 /workspaces/SFP/test_trajectory/completed_trajectory
完成文件 3.csv 的补全并保存到 /workspaces/SFP/test_trajectory/completed_trajectory
完成文件 4.csv 的补全并保存到 /workspaces/SFP/test_trajectory/completed_trajectory


## 定义辅助函数

In [51]:
from class_calm_water_resistance_estimatoin import *
import pickle
from sklearn.preprocessing import StandardScaler

def Cal_R_calm(row):
    heading_ship = row['heading']
    cu = row['stream_val']*np.sin(np.deg2rad(row['stream_direction']))
    cv = row['stream_val']*np.cos(np.deg2rad(row['stream_direction']))
    sog = row['SOG']*0.5144

    V_water = speedGPS2Water(sog, heading_ship, cu, cv)
    V_shallow=V_water
    r_calm = calm_water_resistance(V_shallow)

    return r_calm/1000

def Rtotal2Pe(row):
    """"

    每一行反算R_total函数

    
    """
    cu = row['stream_val']*np.sin(np.deg2rad(row['stream_direction']))
    cv = row['stream_val']*np.cos(np.deg2rad(row['stream_direction']))
    sog = row['SOG']*0.5144 # knot to m/s
    r_total = row['R_t_pre']
    heading_ship = row['heading']
    V_water = speedGPS2Water(sog, heading_ship, cu, cv) 
    Pe = V_water*r_total

    return Pe # 单位千牛

def calFuelHour(row):
    etaR = 1.0 # 准确计算较复杂，但整体波动小，经验值是1-1.07或者0.98
    etaO = 0.60 # 可以算，但需要知道V_A和thruster force T的具体意义，经验值0.55-0.7
    etaS = 0.99 # 无计算公式，取经验值，0.99，0.98，0.95-0.96三种情况
    etaH = 1.1 # 要计算t, w，1-t/1-w,需要知道力和速度的关系。
    SOFC = 200 # g/kwh
    eta = etaR*etaO*etaS*etaH

    P = row['P_pre']
    fuel_hour = P/eta*SOFC/1000000

    return fuel_hour


def calFuelMinute(row):
    """
    计算每分钟的燃油消耗率
    输入:
        row - 数据行，包含 'P_pre'（功率值，单位：kW）
    输出:
        fuel_minute - 每分钟的燃油消耗率（单位：吨/分钟）
    """
    # 各种效率参数
    etaR = 1.0  # 整体波动效率
    eta0 = 0.60  # 螺旋桨效率
    etaS = 0.99  # 联轴器效率
    etaH = 1.1  # 波浪影响效率
    SOFC = 200  # 燃油消耗率，单位：g/kWh

    # 总效率计算
    eta = etaR * eta0 * etaS * etaH

    # 获取功率值
    P = row['P_pre']

    # 每小时的燃油消耗率 (吨/小时)
    fuel_hour = P / eta * SOFC / 1000000

    # 转换为每分钟的燃油消耗率 (吨/分钟)
    fuel_minute = fuel_hour / 60

    return fuel_minute



## 读取数字船的数据并计算各种阻力

In [65]:

df = pd.read_csv('/workspaces/SFP/test_trajectory/completed_trajectory/5.csv')

# df = average_every_n_points(df, chunk_size=60, output_path=None)

df['R_calm'] = df.apply(Cal_R_calm, axis=1) # KN

columns_to_use = [
    'SOG', 'heading', 'draught', 'wind_val', 'wind_direction',
    'wave_val', 'wave_direction', 'stream_val', 'stream_direction'
]
input_df = df[columns_to_use]

# 加载预训练的集成模型
with open('ensemble_model.pkl', 'rb') as file:
    ensemble_model = pickle.load(file)

# 数据预处理（标准化）
feature_scaler = StandardScaler()
input_features_scaled = feature_scaler.fit_transform(input_df)

# 进行预测
df['R_a_pre'] = ensemble_model.predict(input_features_scaled)

# 计算 R_t_pre 列：R_a_pre 和 R_calm 的和
df['R_t_pre'] = df['R_a_pre'] + df['R_calm']

# 计算 P_pre 列
df['P_pre'] = df.apply(Rtotal2Pe, axis=1)

# 计算 Fuel_minutes 列
df['Fuel_hour'] = df.apply(calFuelHour, axis=1)

print(df['Fuel_hour'].sum())

57.60864205292896


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
