In [48]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [49]:
bee_plant = pd.read_csv('Created_Data_Source/Bee_Plant_Unrestrained_New.csv', low_memory = False, index_col=False)
beetle_plant = pd.read_csv('Created_Data_Source/Beetle_Plant_Unrestrained.csv', low_memory = False, index_col=False)
fly_plant = pd.read_csv('Created_Data_Source/Fly_Plant_Unrestrained.csv', low_memory = False, index_col=False)
wasp_plant = pd.read_csv('Created_Data_Source/Wasp_Plant_Unrestrained.csv', low_memory = False, index_col=False)
moth_plant = pd.read_csv('Created_Data_Source/Moth_Plant_Unrestrained.csv', low_memory = False, index_col=False)
butterfly_plant = pd.read_csv('Created_Data_Source/Butterfly_Plant_Unrestrained.csv', low_memory = False, index_col=False)

In [3]:
season_map = {
    "Spring": 1,
    "Summer": 2,
    "Autumn": 3,
    "Winter": 4,
}

def convert_season(season_str):
    seasons = season_str.split('-')
    cyclical = []
    for season in seasons:
        cyclical.append(season_map[season.strip()])
        
    return cyclical

In [52]:
set(butterfly_plant["Interaction_Type"])

{'Herbivory', 'Parasitism', 'Pollination', nan}

In [55]:
butterfly_anta = butterfly_plant.query("Interaction_Type == 'Herbivory' or Interaction_Type == 'Parasitism'")
butterfly_anta = butterfly_anta.query("Expansion_Type != 'UE_Genus'")
butterfly_anta.to_csv("New_Interaction_Files/butterfly_plant/antagonistic.csv")

In [None]:
df = pd.read_csv("Raẇ_Data_Source/Pollination_Plants_Traits.csv")
df = df.dropna()
df["Flower_Color"] = df["Flower_Color"].str.split(', ')
df["Flower_Season"] = df["Flower_Season"].apply(convert_season)
df

In [5]:
colors_dict = {
    "beetle": ['green', 'white', 'yellow'],
    "bee": ["blue", 'purple', 'yellow', 'white', 'violet', 'pink', 'red', 'orange'],
    "fly": ['white', 'yellow', 'green', 'brown'],
    "wasp": ['yellow', 'white', 'blue', 'purple', 'red'],
    "butterfly": ["blue", 'purple', 'yellow', 'white', 'violet', 'pink', 'red', 'orange'],
    "moth": ['white', 'yellow', 'green', 'pink', 'purple']
}

seasons_dict = {
    "beetle": [1,2,3],
    "bee": [1, 2],
    "fly": [1, 2,3],
    "wasp": [2,3],
    "butterfly": [1, 2, 3],
    "moth": [1, 2, 3]
}

## Initializing Constants and The Working DataFrame
___

> Just enter the group that we are working with

In [None]:
while True:
    group = input("Enter the name of the group you are interested in: ")
    group = group.lower()
    working_df_name = f"{group.lower()}_plant"
    # Check if the dynamically generated dataframe name exists
    if working_df_name in locals():
        working_df = locals()[working_df_name]
        break
    else:
        print("Dataframe not found. Please enter name again")

## This is the Static Code

In [None]:
desired_colors = colors_dict[group]
desired_seasons = seasons_dict[group]

print(f"List of colors: {desired_colors}")
print(f"List of seasons: {desired_seasons}")

### Filtering the raw plant trait data dataframe

In [None]:
interaction_df_list = ["Source_Name", "Target_Name", "Interaction_Type", "Type",
                       "Expansion_Type", "Source_Type"]
working_df = working_df[interaction_df_list]
working_df = working_df[working_df["Interaction_Type"].fillna('').str.contains('Pollination')]

working_df = working_df[interaction_df_list].copy()
plant_list = list(set(working_df["Target_Name"].to_list()))
df_new = df.copy()
df_new["Pollinator_Yes"] = df_new["Taxon"].apply(lambda x: "Yes" if x in plant_list else np.nan)
df_new = df_new.query("Pollinator_Yes == 'Yes'")
print(f"Length of reduced plant with trait data information: {len(df_new)}\n")
print(f"Number of total plant species that are pollinated by this clade: {len(plant_list)}")

In [None]:
len(set(working_df["Source_Name"].values))

### Shortening our working df to only contain those interactions of which we have trait data

In [None]:
avail_plants = df_new["Taxon"].to_list()

removed_entries = working_df[~working_df["Target_Name"].isin(avail_plants)]
working_df = working_df[working_df["Target_Name"].isin(avail_plants)]

print(f"Number of entries that are kept: {len(working_df)}, (% = {(len(working_df))/(len(working_df) + len(removed_entries))*100})\n")
print(f"Number of removed entries: {len(removed_entries)}, (% = {(len(removed_entries))/(len(working_df) + len(removed_entries))*100})")

In [None]:
working_df.head()

In [None]:
len(set(working_df["Source_Name"].values))

## The Actual Trait Based Removal (Barrier Class)

In [13]:
flower_df = df_new.copy()

filtered_flower_df = pd.DataFrame()

for _, row in flower_df.iterrows():
    color_list = row['Flower_Color']
    season_list = row['Flower_Season']
    
    for color in color_list:
        color_lower = color.lower()
        for season in season_list:
            if color_lower in desired_colors and season in desired_seasons:
                filtered_flower_df = filtered_flower_df.append(row, ignore_index=True)

In [14]:
# for _, row in flower_df.iterrows():
#     color_list = row['Flower_Color']
#     season_list = row['Flower_Season']
    
#     for color in color_list:
#         color_lower = color.lower()
#         for season in season_list:
#             print(color_lower in desired_colors or season in desired_seasons)

In [None]:
filtered_flower_df

In [None]:
final_plants = filtered_flower_df["Taxon"].to_list()

incorrect_entries = working_df[~working_df["Target_Name"].isin(final_plants)]
correct_entries = working_df[working_df["Target_Name"].isin(final_plants)]

# print(f"Number of correct entries: {len(correct_entries)}, (% = {(len(correct_entries))/(len(correct_entries) + len(incorrect_entries))*100})\n")
print(f"Number of incorrect entries: {len(incorrect_entries)}, (% = {(len(incorrect_entries))/(len(working_df) + len(removed_entries))*100})")

In [None]:
incorrect_entries.sort_values(by='Source_Name')

In [None]:
set(incorrect_entries["Expansion_Type"].values)

In [None]:
nan_df = incorrect_entries[incorrect_entries['Expansion_Type'].isna()]
nan_df

In [None]:
final_df = pd.concat([correct_entries, nan_df])
final_df

In [None]:
print(f"Percentage of false negatives: {len(nan_df)/len(final_df)*100}")

In [None]:
len(set(final_df["Source_Name"].values))

In [None]:
x = f"{group}-plant_final"
final_df.to_csv(f"New_Interaction_Files/{x}.csv")

# Final Dataset Creation
> Do not run till you have all the required files

In [41]:
# bee = pd.read_csv('New_Interaction_Files/bee-plant_final.csv')
# beetle = pd.read_csv('New_Interaction_Files/beetle-plant_final.csv')
# butterfly = pd.read_csv('New_Interaction_Files/butterfly-plant_final.csv')
# fly = pd.read_csv('New_Interaction_Files/fly-plant_final.csv')
# moth = pd.read_csv('New_Interaction_Files/moth-plant_final.csv')
# wasp = pd.read_csv('New_Interaction_Files/wasp-plant_final.csv')

In [None]:
# final_df = pd.concat([bee, beetle, butterfly, fly, moth, wasp])
# final_df = final_df.set_index("Unnamed: 0")
# final_df

In [44]:
# final_df.to_csv("New_Interaction_Files/Plant_Pollinator_Final.csv")