한 꿀벌 군집으로 부터 6km내에 존재하는 꿀벌 군집만 추출

In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import BallTree

def find_nearby_indices(df, distance_threshold=6):
    """
    BallTree를 이용하여 각 행의 위도 및 경도를 비교하고, 
    지정된 거리 이내에 있는 다른 행들의 인덱스를 찾아 반환하는 함수.
    
    Args:
        df (pd.DataFrame): Latitude, Longitude 열이 포함된 데이터프레임
        distance_threshold (float): 거리 임계값 (단위: km)

    Returns:
        pd.DataFrame: 각 행에 대해 인근 군집의 인덱스를 담은 DataFrame
    """
    # 위도, 경도를 라디안으로 변환
    coords = np.radians(df[['Latitude', 'Longitude']].values)
    tree = BallTree(coords, metric='haversine')
    radius = distance_threshold / 6371.0  # km → radians

    # 반경 내 인덱스 검색
    indices = tree.query_radius(coords, r=radius)

    nearby_indices = []
    for i, neighbors in enumerate(indices):
        # 자기 자신 제외
        filtered = [j for j in neighbors if j != i]
        nearby_indices.append(filtered)

    # 결과 DataFrame 구성
    result_df = pd.DataFrame(index=df.index)
    for idx, idx_list in enumerate(nearby_indices):
        for n, neighbor_idx in enumerate(idx_list):
            result_df.loc[idx, f'nearby_idx_{n+1}'] = neighbor_idx
    return result_df


In [2]:
# 입력데이터
df = pd.read_csv("../../data/final/4_Apis_cerana.csv")
df1 = pd.read_csv("../../data/final/4_Apis_mellifera.csv")

result = find_nearby_indices(df)
print(result)

result1 = find_nearby_indices(df1)
print(result1)

      nearby_idx_1  nearby_idx_2  nearby_idx_3  nearby_idx_4  nearby_idx_5  \
0           1077.0         781.0         406.0           1.0        1158.0   
1              0.0           NaN           NaN           NaN           NaN   
2            324.0          80.0          82.0         371.0         599.0   
3             79.0         210.0         218.0         418.0         135.0   
4           1077.0         781.0         406.0        1158.0         161.0   
...            ...           ...           ...           ...           ...   
1154         143.0           NaN           NaN           NaN           NaN   
1155           NaN           NaN           NaN           NaN           NaN   
1156         324.0          80.0          82.0           2.0         371.0   
1157           NaN           NaN           NaN           NaN           NaN   
1158        1077.0         781.0         406.0         161.0           0.0   

      nearby_idx_6  nearby_idx_7  nearby_idx_8  nearby_idx_9  n

In [3]:
result.to_csv("../../data/processed/6_6km_nearby_cer.csv")
result1.to_csv("../../data/processed/6_6km_nearby_mel.csv")

밀원

In [12]:
import pandas as pd
import numpy as np
from sklearn.neighbors import BallTree

def find_nearby_rows_flo(df, distance_threshold=6):
    """
    BallTree를 이용하여 각 행의 위도 및 경도를 비교하고,
    지정된 거리 이내에 있는 다른 행들의 ID를 리스트로 반환하는 함수.

    Args:
        df (pd.DataFrame): 'Latitude', 'Longitude', 'ID' 열을 포함한 데이터프레임
        distance_threshold (float): 거리 임계값 (단위: km)

    Returns:
        pd.DataFrame: 각 ID와 반경 내 인접 ID 리스트가 포함된 DataFrame
    """
    coords = np.radians(df[['Latitude', 'Longitude']].values)
    tree = BallTree(coords, metric='haversine')

    radius = distance_threshold / 6371.0

    indices = tree.query_radius(coords, r=radius)

    nearby_ids = []
    for i, neighbors in enumerate(indices):
        filtered = [df.iloc[j]['ID'] for j in neighbors if j != i]
        nearby_ids.append(filtered)

    result_df = pd.DataFrame({
        'ID': df['ID'],
        'Nearby_IDs': nearby_ids
    })

    return result_df


In [13]:
df = pd.read_csv("../../data/processed/3_Floral_Source_ID.csv")
df = df.rename(columns={
    '위도': 'Latitude',
    '경도': 'Longitude',
    '종': 'Species',
    '카운트' : 'Count'
})
df.columns = ['Species', 'Longitude', 'Latitude', 'Count', 'ID']
result = find_nearby_rows_flo(df)

In [14]:
result['Nearby_IDs'] = result['Nearby_IDs'].apply(lambda ids: str([int(i) for i in ids]))

result.to_csv("../../data/processed/6_nearby_floral.csv", index=False, encoding='utf-8-sig')

꿀벌-모든 밀원

In [23]:
import pandas as pd
import numpy as np
from sklearn.neighbors import BallTree


cerana_df = pd.read_csv("../../data/final/3_Apis_cerana.csv")
mellifera_df = pd.read_csv("../../data/final/3_Apis_mellifera.csv")
floral_df = pd.read_csv("../../data/processed/3_Floral_Source_ID.csv")

floral_df = floral_df.rename(columns={'위도': 'Latitude', '경도': 'Longitude', 'ID': 'ID'})


def attach_nearby_floral_ids(bee_df, floral_df, distance_threshold=6):
    """
    각 꿀벌 위치로부터 반경 내 밀원 ID들을 nearby_1, nearby_2, ... 형태로 붙임

    Args:
        bee_df (pd.DataFrame): 꿀벌 발생 데이터 (Latitude, Longitude 포함)
        floral_df (pd.DataFrame): 밀원 위치 데이터 (Latitude, Longitude, ID 포함)
        distance_threshold (float): 거리 임계값 (단위: km)

    Returns:
        pd.DataFrame: nearby_x 열이 추가된 결과
    """
    bee_coords = np.radians(bee_df[['Latitude', 'Longitude']].values)
    floral_coords = np.radians(floral_df[['Latitude', 'Longitude']].values)
    
    tree = BallTree(floral_coords, metric='haversine')
    radius = distance_threshold / 6371.0  # km → radian

    indices = tree.query_radius(bee_coords, r=radius)


    for i, nearby in enumerate(indices):
        for j, idx in enumerate(nearby):
            bee_df.loc[i, f'nearby_{j+1}'] = int(floral_df.iloc[idx]['ID'])

    return bee_df


In [26]:
cerana_with_nearby = attach_nearby_floral_ids(cerana_df.copy(), floral_df)
mellifera_with_nearby = attach_nearby_floral_ids(mellifera_df.copy(), floral_df)

cerana_with_nearby.to_csv("../../data/final/6_Apis_cerana_with_nearby.csv", index=False, encoding='utf-8-sig')
mellifera_with_nearby.to_csv("../../data/final/6_Apis_mellifera_with_nearby.csv", index=False, encoding='utf-8-sig')

  bee_df.loc[i, f'nearby_{j+1}'] = int(floral_df.iloc[idx]['ID'])
  bee_df.loc[i, f'nearby_{j+1}'] = int(floral_df.iloc[idx]['ID'])
  bee_df.loc[i, f'nearby_{j+1}'] = int(floral_df.iloc[idx]['ID'])
  bee_df.loc[i, f'nearby_{j+1}'] = int(floral_df.iloc[idx]['ID'])
  bee_df.loc[i, f'nearby_{j+1}'] = int(floral_df.iloc[idx]['ID'])
  bee_df.loc[i, f'nearby_{j+1}'] = int(floral_df.iloc[idx]['ID'])
  bee_df.loc[i, f'nearby_{j+1}'] = int(floral_df.iloc[idx]['ID'])
  bee_df.loc[i, f'nearby_{j+1}'] = int(floral_df.iloc[idx]['ID'])
  bee_df.loc[i, f'nearby_{j+1}'] = int(floral_df.iloc[idx]['ID'])
  bee_df.loc[i, f'nearby_{j+1}'] = int(floral_df.iloc[idx]['ID'])
  bee_df.loc[i, f'nearby_{j+1}'] = int(floral_df.iloc[idx]['ID'])
  bee_df.loc[i, f'nearby_{j+1}'] = int(floral_df.iloc[idx]['ID'])
  bee_df.loc[i, f'nearby_{j+1}'] = int(floral_df.iloc[idx]['ID'])
  bee_df.loc[i, f'nearby_{j+1}'] = int(floral_df.iloc[idx]['ID'])
  bee_df.loc[i, f'nearby_{j+1}'] = int(floral_df.iloc[idx]['ID'])
  bee_df.l