각 군집의 6km 이내 이웃 군집 탐색탐색

In [1]:
import pandas as pd

In [2]:
def analyze_species_similarity(df, nearby_df):
    """
    각 군집이 6km 이내 이웃 군집들과 같은 '종' 값을 공유하는지 분석.

    Args:
        df (pd.DataFrame): 원본 데이터 (ID, 종 포함)
        nearby_df (pd.DataFrame): find_nearby_rows_optimized의 출력 결과

    Returns:
        pd.DataFrame: ID별로 근접 군집 수, 동일 종 군집 수, 일치율 포함 결과
    """
    id_to_species = df.set_index('ID')['종'].to_dict()
    similarity_results = []

    for idx, row in nearby_df.iterrows():
        base_id = df.loc[idx, 'ID']
        base_species = id_to_species.get(base_id, None)
        matches = 0
        total = 0

        for col in row.dropna().index:
            neighbor_id = row[col]
            neighbor_species = id_to_species.get(neighbor_id, None)
            if neighbor_species is not None:
                total += 1
                if neighbor_species == base_species:
                    matches += 1

        similarity_results.append({
            'ID': base_id,
            '종': base_species,
            'Nearby_Count': total,
            'Same_Species_Count': matches,
            'Same_Species_Rate': matches / total if total > 0 else None
        })

    return pd.DataFrame(similarity_results)

In [3]:
df = pd.read_csv("../../data/final/4_Apis_cerana.csv")
nearby_df = pd.read_csv("../../data/processed/6_6km_nearby_cer.csv")
result = analyze_species_similarity(df, nearby_df)

print(result.head())


df1 = pd.read_csv("../../data/final/4_Apis_mellifera.csv")
nearby_df1 = pd.read_csv("../../data/processed/6_6km_nearby_mel.csv")
result1 = analyze_species_similarity(df1, nearby_df1)

print(result1.head())

     ID      종  Nearby_Count  Same_Species_Count  Same_Species_Rate
0  6441  아까시나무             0                   0                NaN
1  5512  상수리나무             0                   0                NaN
2   730    개다래             0                   0                NaN
3  2536     다래            10                  10                1.0
4   596    개다래             0                   0                NaN
     ID     종  Nearby_Count  Same_Species_Count  Same_Species_Rate
0  6846  애기똥풀             0                   0                NaN
1  2650    다래             0                   0                NaN
2  6867  애기똥풀             0                   0                NaN
3   632   개다래             0                   0                NaN
4  2547    다래             0                   0                NaN


In [4]:
result.to_csv("../../data/final/7_nearby_similar_cer.csv", index=False, encoding='utf-8-sig')
result1.to_csv("../../data/final/7_nearby_similar_mel.csv", index=False, encoding='utf-8-sig')