In [16]:
import pandas as pd
import numpy as np
import math

In [17]:
def calculate_distance(lat1, lon1, lat2, lon2):
    """
    计算两点之间的距离 (使用Haversine公式)
    """
    R = 6371  # 地球半径 (公里)
    
    lat1_rad = math.radians(lat1)
    lon1_rad = math.radians(lon1)
    lat2_rad = math.radians(lat2)
    lon2_rad = math.radians(lon2)
    
    dlon = lon2_rad - lon1_rad
    dlat = lat2_rad - lat1_rad
    
    a = math.sin(dlat/2)**2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(dlon/2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    
    return R * c

def generate_cost_data(islands_df, lng_facilities_df):
    """
    生成岛屿的All_Total_Cost数据，使用实际的LNG设施数据
    """
    
    # 为每个岛屿计算All_Total_Cost
    costs = []
    
    for _, island in islands_df.iterrows():
        # 基础成本
        base_cost = 100000
        
        # 纬度因素 - 更高的绝对纬度 = 更高的成本
        latitude_factor = (abs(island['Lat']) / 10) ** 1.2 * 10000
        
        # 西太平洋区域溢价
        region_premium = 0
        if (island['Long'] > 120 and island['Long'] < 160 and 
            island['Lat'] > -10 and island['Lat'] < 30):
            region_premium = 50000
        
        # 北部地区溢价 (特别是超过50度)
        northern_premium = 0
        if island['Lat'] > 50:
            northern_premium = (island['Lat'] - 50) * 10000
        
        # 到最近LNG设施的距离
        min_distance = float('inf')
        for _, facility in lng_facilities_df.iterrows():
            # 假设LNG设施数据包含'Lat'和'Long'列
            distance = calculate_distance(island['Lat'], island['Long'], 
                                         facility['Latitude'], facility['Longitude'])
            min_distance = min(min_distance, distance)
        
        lng_distance_factor = min_distance * 300
        
        # 结合所有因素
        total_cost = base_cost + latitude_factor + region_premium + northern_premium + lng_distance_factor 
        
        # 添加一些随机性 (±10%)
        random_factor = 0.9 + np.random.random() * 0.2
        total_cost *= random_factor
        
        # 极高纬度获得额外加成
        if abs(island['Lat']) > 65:
            total_cost *= 1.5
        if abs(island['Lat']) > 75:
            total_cost *= 1.8
        
        # 确保成本在合理范围内
        total_cost = max(total_cost, 100000)
        
        costs.append(round(total_cost, 2))
    
    return costs

    


In [18]:
# 读取岛屿数据
islands_df = pd.read_csv('island_data1.csv')

# 读取LNG设施数据
lng_facilities_df = pd.read_excel('LNG_Terminals.xlsx')

# 生成成本数据
costs = generate_cost_data(islands_df, lng_facilities_df)

# 添加成本列
islands_df['All_Total_Cost'] = costs

# 输出包含成本的数据，现在包括pop列
output_df = islands_df[['ID', 'Long', 'Lat', 'pop', 'All_Total_Cost']]
output_df


Unnamed: 0,ID,Long,Lat,pop,All_Total_Cost
0,988,-68.960532,-55.246549,129.0,663941.62
1,1028,-67.644515,-55.084701,1682.0,602842.42
2,1152,-64.315773,-54.798325,240.0,566525.66
3,2147,-70.595284,-53.964415,129.0,666966.22
4,3007,-68.677673,-54.054777,167448.0,558696.95
...,...,...,...,...,...
2010,85149,-138.829615,-9.985292,116.0,1611915.84
2011,85150,-5.708494,-15.959406,6039.0,714332.83
2012,100046,-3.110323,59.013431,11469.0,343393.12
2013,100049,-2.912430,58.853893,2038.0,321502.66


In [19]:
output_df.to_csv('island_cost_data.csv', index=False)