In [9]:
import numpy as np
import pandas as pd
import plotly.express as px
from shapely.geometry import Polygon
import scipy.stats

import sys
sys.path.append('../')
from helper_functions.gridding import h3_grid
from helper_functions.inference_models import ActualDistribution
from helper_functions.metrics import generate_richness_frame, generate_shannon_frame, generate_simpson_frame
from web_interface.func_import import load_data, add_geometry, hex_to_geojson

In [4]:
df = load_data()  
df.dropna(subset=['Latitude', 'Longitude'], inplace=True)
df['Year'] = df['Date'].apply(lambda row: row[:4])
df

Unnamed: 0,ScientificName,TaxonID,Date,Latitude,Longitude,Municipality,Year
0,Strix aluco,http://tun.fi/MX.28998,2000-01-01 - 2000-12-31,60.340900,25.242500,Sipoo,2000
1,Fringilla coelebs,http://tun.fi/MX.36237,2001-07-24,60.177000,24.202000,Siuntio,2001
2,Certhia familiaris,http://tun.fi/MX.34616,2000-05-14,60.442100,24.519000,Vihti,2000
3,Cyanistes caeruleus,http://tun.fi/MX.34574,2000-04-25,61.593000,27.318800,Mikkeli,2000
4,Corvus monedula,http://tun.fi/MX.37142,2001-04-22,60.177000,24.202000,Siuntio,2001
...,...,...,...,...,...,...,...
987433,Picoides tridactylus,http://tun.fi/MX.30453,2022-06-07,68.925050,26.778422,Inari,2022
987434,Picoides tridactylus,http://tun.fi/MX.30453,2022-06-21,69.018273,26.737819,Inari,2022
987435,Motacilla flava,http://tun.fi/MX.32180,2022-05-19,61.226294,28.811891,Imatra,2022
987436,Delichon urbicum,http://tun.fi/MX.32163,2022-05-19,61.221876,28.797762,Imatra,2022


In [5]:
grid_object = h3_grid()
grid_object.fit(df)

In [6]:
actual_model = ActualDistribution('h3_cell', 'ScientificName')
actual_dist = actual_model.fit(df)

In [7]:
def calculate_dominant_percent (row):
    return np.max(row)

def generate_dominant_frame (df):
    dominant = df.apply(calculate_dominant_percent, axis = 1)
    dominant = dominant.to_frame()
    dominant.columns = ['dominant']
    dominant.reset_index(inplace = True)
    return dominant

dominant = generate_dominant_frame(actual_dist)
fig = px.histogram(dominant, x="dominant", title = "Distribution of percentage of dominant species")
fig.show()

In [11]:
def load_geojson(df_h3, value_field):
    df_h3["geometry"] = df_h3.apply(add_geometry, axis=1)
    geojson_object = hex_to_geojson(
        df_h3, hex_id_field="h3_cell", value_field=value_field, geometry_field="geometry"
    )
    return geojson_object


def plot_grid_dominant (data):
    geojson = load_geojson(data, 'dominant')
    fig = px.choropleth_mapbox(
        data,
        geojson=geojson,
        locations="h3_cell",
        color=pd.cut(data['dominant'], bins=[0, 0.1, 0.2, 0.5, 1]).astype(str),
        color_discrete_map={
            "(0.0, 0.1]": "#1a02b8",
            "(0.1, 0.2]": "#ae22b3",
            "(0.2, 0.5]": "red",
            "(0.5, 1.0)": "yellow",
        },
        hover_data=['dominant'],
        center=dict(lat=65, lon=24),
        zoom=4,
        width=600,
        height=650,
        opacity=0.35,
        labels={"color": "Dominant percentage", 'dominant': "Dominant percentage"},
        mapbox_style="open-street-map",
    )

    fig.update_geos(projection_type="foucaut")

    fig.update_layout(
        legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
        autosize=False,
        margin=dict(l=0, r=0, b=0, t=0, pad=4, autoexpand=True),
    )
    return fig

fig = plot_grid_dominant(dominant)
fig.update_layout(
      width=1000,
      height=800,
)
fig.show()

In [13]:
richness = generate_richness_frame(actual_dist)
shannon_entropies = generate_shannon_frame(actual_dist)
simpson_indices = generate_simpson_frame(actual_dist)

metrics = pd.merge(richness, shannon_entropies)
metrics = pd.merge(metrics, simpson_indices)
metrics = pd.merge(metrics, dominant)
metrics

Unnamed: 0,h3_cell,richness,shannon_entropy,simpson_index,dominant,geometry
0,85012603fffffff,61,3.744717,0.869208,0.240000,POLYGON ((26.695886659344264 69.94246236765055...
1,85012613fffffff,71,4.411372,0.920463,0.154506,"POLYGON ((26.61443341995143 69.81869000942822,..."
2,85012617fffffff,25,4.388155,0.944527,0.096154,POLYGON ((26.304126498918087 69.91532638971877...
3,8501261bfffffff,104,5.709731,0.971990,0.078388,POLYGON ((27.004894867595613 69.84504909290897...
4,85012643fffffff,80,5.773537,0.976973,0.044743,"POLYGON ((27.790703442725356 69.8952999863398,..."
...,...,...,...,...,...,...
2063,85112ed3fffffff,127,5.731707,0.964359,0.104699,POLYGON ((28.39941571879935 62.112281924906135...
2064,85112ed7fffffff,107,5.510233,0.954679,0.153249,POLYGON ((28.534115496785855 62.24453765705749...
2065,85112edbfffffff,139,6.061747,0.973637,0.106895,"POLYGON ((28.542256965349033 61.9914443611816,..."
2066,851135a7fffffff,1,-0.000000,0.000000,1.000000,POLYGON ((26.520505012245458 60.12445954409234...


In [17]:
richness_np = np.array(metrics['richness'])
shannon_np = np.array(metrics['shannon_entropy'])
simpson_np = np.array(metrics['simpson_index'])
dominant_np = np.array(metrics['dominant'])


fig = px.imshow(np.corrcoef([richness_np, shannon_np, simpson_np, dominant_np]), 
                x = ["richness", "shannon", "simpson", "dominant"], 
                y = ["richness", "shannon", "simpson", "dominant"],
                zmin = - 1, # Sets the lower bound of the color domain
                zmax = 1,
                text_auto=True, color_continuous_scale=["blue", "white", "red"],
                title = "Pearson correlation between metrics and dominant species percentage")
fig.show()

It is visible that the percentage of dominant species has a very strong negative correlation with Shannon entropy and Simpson index. This is because large percentage of one dominant species will diminish the eveness of species distribution, which Shannon entropy and Simpson index measures. The percentage of dominant species however, does not have strong correlation with species richness.  

It is also visible that the percentage of dominant species does not show a clear trend from north to south, thus Shannon entropy and Simpson index would likely do not show clear trends either, despite the increasing trend of species richness from north to south.