# Analysis in Python


In [None]:
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import sys
import h3
import plotly.express as px
import random
import scipy as sp
from statsmodels.stats import multitest
import math
import statannot
from geojson import Feature, Point, FeatureCollection
from shapely.geometry import Polygon


## Converting GPS data into Hex grids
We use h3 package with parameter resolution parameter=0.1

In [None]:
def AddHexGrid(target):
    meta_gbif=pd.read_csv('./GBIF/For_gbif_trickstar.xlsx - Sheet1.csv') # which species corresponds which trickster
    extract=meta_gbif[meta_gbif['Category']==target]
    N=len(extract)
    for i in range(N):
        path='./GBIF/'+target+'/'+extract.iloc[i, 2]+'_cleaned.csv'
        df=pd.read_csv(path)
        resolution =1 # We have 842  grids. See https://h3geo.org/docs/core-library/restable
        hex_index=[]
        for i in range(len(df)):
            index=h3.geo_to_h3(df.iloc[i, 1], df.iloc[i, 0], resolution)
            hex_index.append(index)
        df['hex_index']=hex_index
        df.to_csv(path, index=False)
# Example
# AddHexGrid('anteater') # change target species names

In [None]:
# plot the distirbutions of real animals and trickster animals
def hexagons_dataframe_to_geojson(df_hex, hex_id_field,geometry_field, value_field,file_output = None):

    list_features = []

    for i, row in df_hex.iterrows():
        feature = Feature(geometry = row[geometry_field],
                          id = row[hex_id_field],
                          properties = {"value": row[value_field]})
        list_features.append(feature)

    feat_collection = FeatureCollection(list_features)

    if file_output is not None:
        with open(file_output, "w") as f:
            json.dump(feat_collection, f)

    else :
      return feat_collection


def Double_distributions(target):
    # target: str of  species
    # plot tricksters
    
    df_TS=pd.read_csv('TrickSter_data3.csv')
    df_TS=df_TS[df_TS['TrickSter']==target]
    
    # plot real animals
    df_meta=pd.read_csv('./GBIF/For_gbif_trickstar.xlsx - Sheet1.csv')
    df_extract=df_meta[df_meta['Category']==target]['Taxa'] # real animal taxa corresponding to the target Trickster
    df_extract=df_extract.reset_index(drop=True)
    for j in range(len(df_extract)):
        animal_hex=pd.read_csv('./GBIF/'+target+'/'+df_extract[j]+'_cleaned.csv')['hex_index'].unique()
        if j==0:
            df_animal=animal_hex
        else:
            df_animal=np.concatenate([df_animal, animal_hex], axis=0)
    presence=[]
    TS_index=df_TS['hex_index'].to_list()
    
    df_animal=np.unique(df_animal)
    for i in range(len(df_animal)):
        if df_animal[i] in TS_index:
            presence.append('Both') # both RA and TS
        else:
            presence.append('Only RA') # only RA
    df_animal=df_animal.tolist()
    for i in range(len(TS_index)):
        if TS_index[i] not in df_animal:
            df_animal.append(TS_index[i])
            presence.append('Only TS') # only TS
    df_animal_hex=pd.DataFrame({'hex_index':df_animal, 
                            'Presence':presence})
    Poly=[]
    for i in range (len(df_animal_hex)):
        x=h3.h3_to_geo_boundary(df_animal_hex.iloc[i, 0], True)
        
        y=np.asanyarray(x)
        if np.any(y[:,0]<-161) or np.any(y[ :, 0]>170):
            for i in range(len(y)):
                if y[i,0]<0:                
                    y[i, 0] =360+y[i, 0]
            x=tuple(map(tuple, y))
        #Poly.append(Polygon(h3.h3_to_geo_boundary(df_animal_hex.iloc[i, 0], True)))
        
        Poly.append(Polygon(x))
    df_animal_hex['geometry'] = Poly
    geojson_obj = (hexagons_dataframe_to_geojson
                    (df_animal_hex,
                     hex_id_field='hex_index',
                     value_field='Presence',
                     geometry_field='geometry'))
    fig = (px.choropleth_mapbox(
                        df_animal_hex, 
                        geojson=geojson_obj, 
                        locations='hex_index', 
                        color='Presence',
                        color_discrete_map={'Both':'#66c2a5','Only RA':'#8da0cb','Only TS':'#fc8d62'},
                        range_color=([0,2]),                  
                       mapbox_style='carto-positron',
                        zoom=0.5,
                        center = {"lat": 0.0, "lon": 0.0},
                        opacity=0.6,
                        width=800, height=400))
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.update_traces(showlegend=False)
    fig.write_image("WorldMap_"+target+".pdf")
    fig.show() 
# Example
# Double_distributions('anteater')

## Presence of trickster animals when real animals exist
Below, we analyze the conditional probability that real animal exists when the trickster animals are reported.

In [None]:
def Presence_Rate(df=df):
    # Prob(real animal presence | tricksters is presence)
    Count=[]
    Trickster=[]
    Presence=[]
    Frequency=[]
    Error=[]
    tricksters_array=df['TrickSter'].unique()
    for i in range(len(tricksters_array)):
        target=tricksters_array[i]
        print(target)
        n=len(df[df['TrickSter']==target]['presence'])
        x=sum(df[df['TrickSter']==target]['presence'])
        p=x/n
        Presence.append(x)
        Count.append(n)
        Trickster.append(target)
        Error.append(1.96*np.sqrt(p*(1-p)/n)) # .95 confidence interval
        Frequency.append(p)
    return pd.DataFrame({'Trickster':Trickster, 'Presence':Presence, 'Total':Count, 'Frequency':Frequency, '95CI':Error})
df_count=Presence_Rate(df=df)
# remove some species that we do not analyze
df1=df_count[df_count['Trickster']!='water bird']
df1=df1[df1['Trickster'] != 'monkey']
df1=df1[df1['Trickster']!='ground squirrel']
df1=df1.sort_values('Trickster')
#------Plot the conditional probability-------------------
ax=sns.barplot(data=df1, y='Trickster', x='Frequency', color="#8dd3c7")
plt.yticks(fontsize=16)
plt.xticks(fontsize=16)
plt.xlabel('Frequency of presence', fontsize=20)
plt.ylabel('')
plt.xlim(0, 1.01)
y_coords = [p.get_y()+0.5* p.get_width() for p in ax.patches]
x_coords = df1['Frequency']
ax.errorbar(x=x_coords, y=y_coords, xerr=df1["95CI"], fmt="none", c="k")
#plt.show()
plt.savefig('Presence_Trickster_Animal.pdf', bbox_inches='tight', pad_inches=0.05)