In [70]:
import pandas as pd
import sqlite3 as db
import datetime as dt
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import r2_score,explained_variance_score
from sklearn.cluster import KMeans

Identifying birds that strongly select for one or more habitats

In [71]:
github_userName = 'Tanag3r'
ebird_token = 'j6c7l80ga2ib'
db_name = 'trailheadDirectBirds_sous.db'

In [72]:
##connect to database
def connectDB():
    try:
        cnx = db.connect(db_name)
    except Exception as cnxError:
        raise UserWarning(f'Unable to connect to database due to: {cnxError}')
    return cnx

In [110]:
##TODO #99 replace the update latestUpdate in the function with a trigger in the database
class notInTable(ValueError):
    def __init__(self,locId,message="given locId cannot be found in table Hotspots"):
        self.locId = locId
        self.message = message
        super().__init__(self.message)
    def __str__(self):
        return f'{self.locId} >> {self.message}'



In [114]:

def post_hotspotHabitat(locId: str,label: int,cnx=connectDB()):
    try:
        cursor = cnx.cursor()
    #validate
        validate_query = "SELECT EXISTS(SELECT ? FROM Hotspots WHERE locId = ?)"
        validate_tuple = (locId,locId)
        cursor.execute(validate_query,validate_tuple)
        validate = cursor.fetchone()[0]
        if validate == 0:
            raise notInTable(locId)
        else:
            updateLabel_script = f"UPDATE Hotspots SET habitatLabel = ?, latestUpdate = ? WHERE locId = ?;"
            updatelabel_tuple = (label,str(dt.datetime.today()),locId)
            cursor.execute(updateLabel_script,updatelabel_tuple)
            cnx.commit()
            cursor.close()
    except ValueError as vxf:
        raise vxf
    except Exception as ex:
        raise ex
    return locId

In [83]:
#derive habitat cluster labels from FAO values
def kmeans_habitat(distinctHabitats: int,cnx= connectDB()):
    try:
        data = pd.read_sql(sql='SELECT * FROM FAO_by_locId;',con=cnx)
        data = data.drop(columns=['locName']).set_index('locId')
        data.fillna(0,inplace=True)
    #normalize
        maxValue = data.apply(max,axis=1)
        data = data.apply(lambda x: (x/maxValue[x.index]),axis=0)   #min-max normalizing to smooth in proportionality
    #compute kmeans for each locId
        habitat_kmeans = KMeans(n_clusters=distinctHabitats,init='k-means++')
        habitat_kmeans = habitat_kmeans.fit(data.values)
        clusterLabels = habitat_kmeans.labels_
        centerPoints = habitat_kmeans.cluster_centers_
    #define habitats
        habitatFrame = pd.DataFrame(data=clusterLabels,columns=['clusterLabel'],index=data.index).sort_values(by='clusterLabel').reset_index()
        #habitatFrame = pd.merge(left=habitatFrame,left_on='locId',right=data,right_on='locId',how='left')
    except Exception as kmeansExc:
        raise kmeansExc
    return habitatFrame,centerPoints

In [84]:
frame, centers = kmeans_habitat(distinctHabitats=11)
centers

array([[ 0.00000000e+00,  7.78046089e-01,  2.98861344e-01,
         3.06492158e-01,  3.08591038e-01,  0.00000000e+00,
         1.98752332e-01,  0.00000000e+00,  5.85937500e-02,
         0.00000000e+00,  0.00000000e+00,  9.92187500e-01,
         3.49460908e-01,  7.58539244e-02],
       [ 0.00000000e+00,  3.66795367e-02,  0.00000000e+00,
         2.38195295e-01,  0.00000000e+00,  6.93889390e-18,
         1.00000000e+00,  8.67361738e-19,  2.41863249e-01,
         5.50102828e-01,  1.73472348e-18,  1.90476190e-02,
         0.00000000e+00, -1.73472348e-18],
       [ 6.93889390e-18,  5.55111512e-17,  0.00000000e+00,
         1.76366843e-02,  3.52733686e-02,  6.93889390e-18,
         1.60635455e-01,  4.16541963e-02,  1.76366843e-02,
         1.00000000e+00,  9.55908289e-02, -5.55111512e-17,
         0.00000000e+00, -1.73472348e-18],
       [ 5.36666667e-01,  1.00000000e+00,  0.00000000e+00,
         1.38777878e-17,  2.77555756e-17,  0.00000000e+00,
         5.55111512e-17,  0.00000000e+00,  2.

In [117]:

for x in frame.index:
    locId = str(frame['locId'][x])
    label = int(frame['clusterLabel'][x])

    post_hotspotHabitat(locId=locId,label=label)



In [69]:
post_hotspotHabitat(locId='L8520',label=3)

notInTable: L8520 >> given locId cannot be found in table Hotspots