# Import Required Libraries
Import the necessary libraries, including requests and pandas.

In [0]:
# Importing the required libraries
import requests
import pandas as pd
from pyinaturalist import get_observations
import numpy as np

# Send GET Request to iNaturalist API
Use the requests library to send a GET request to the iNaturalist API.

In [0]:
laval_place_id = 27655
montreal_place_id = 187355
gatineau_place_id = 142292

# Initialize an empty DataFrame to store the observations
def get_observations_dataframe(place_id,place_name):
    df = pd.DataFrame()
    for page in range(1, 10):
        observation = get_observations(place_id=place_id, verifiable=True,per_page=200, page=str(page), quality_grade="research")
        df = df.append(observation["results"], ignore_index=True)
        df["location"] = place_name
    return df

df_naturalist = pd.DataFrame()
# Example usage
laval_place_id = 27655
data_laval = get_observations_dataframe(laval_place_id,"Laval")
data_montreal = get_observations_dataframe(montreal_place_id,"Montreal")
data_gatineau = get_observations_dataframe(gatineau_place_id,"Gatineau")

df_naturalist = pd.concat([data_laval, data_montreal, data_gatineau], ignore_index=True)

print("Data saved to 'inaturalist_data.csv'.")

In [0]:
import pandas as pd

# Assuming df_naturalist is a pandas DataFrame
df_naturalist['observed_on'] = pd.to_datetime(df_naturalist['observed_on'], utc=True)

# Save the DataFrame to Parquet
df_naturalist.to_parquet("naturalist_sample.parquet")

In [0]:
df_naturalist = pd.read_parquet("naturalist_sample.parquet")
df_naturalist["observed_on"] = df_naturalist["observed_on"].dt.strftime("%Y-%m-%d")

In [0]:
import os

# Get all files in the data folder
# Read the 'sentinelle_liste_sp.csv' file from the workspace
file_path = 'sentinelle_liste_sp.csv'
df_sentinelle = pd.read_csv(file_path)
# Filter the files based on the ones that contain "sentinelle" in their name

display(df_sentinelle)
# Add any additional code here for further processing or analysis

In [0]:
import numpy as np

def add_isEnvahissant_column(df_inaturalist, df_sentinelle):
    cols_to_return = list(df_inaturalist)
    cols_to_return.append("isEnvahissant")
    df_inaturalist['species_guess'] = df_inaturalist[['species_guess']].apply(lambda x: x.astype(str).str.lower())
    df_sentinelle["Nom_francais"] = df_sentinelle[['Nom_francais']].apply(lambda x: x.astype(str).str.lower())
    merged_df = df_inaturalist.merge(right=df_sentinelle, left_on='species_guess', right_on='Nom_francais', how='left')
    merged_df["isEnvahissant"] = np.where((merged_df["Nom_francais"].isnull()) | (merged_df["Nom_francais"] == np.nan), False, True)
    return merged_df[cols_to_return]

# joined_df = add_isEnvahissant_column(df_naturalist, df_sentinelle)
# joined_df[joined_df["isEnvahissant"] == True]

In [0]:
# Keep only the 'species_guess' column and add a new column with the value 'Laval'
# Convert the values in the 'species_guess' column to lowercase and select only the 'species_guess' column
df_simplified = df_naturalist[['species_guess']].apply(lambda x: x.astype(str).str.lower())
df_simplified['location'] = df_naturalist[['location']]
df_simplified['observation_date'] = df_naturalist['observed_on']
df_simplified = add_isEnvahissant_column(df_simplified, df_sentinelle)

# Save the DataFrame as a JSON file with UTF-8 encoding and records orientation
df_simplified.to_json("laval_with_species.json", orient="records", force_ascii=False)

# Parse API Response
Parse the JSON response from the API and convert it into a pandas DataFrame.

In [0]:
# Parse the JSON response and convert it into a pandas DataFrame
# Extract the relevant information from each observation
observations = []
for result in results:
    observation = {
        'id': result['id'],
        'observed_on': result['observed_on'],
        'description': result['description'],
        'latitude': result['geojson']['coordinates'][1],
        'longitude': result['geojson']['coordinates'][0],
        'location': result['location'],
        'species_guess': result['species_guess']
    }
    observations.append(observation)

# Convert the list of observations into a pandas DataFrame
df = pd.DataFrame(observations)

# Save the DataFrame to a CSV file
df.to_csv('inaturalist_data.csv', index=False)

print("Data saved to 'inaturalist_data.csv'.")

# Filter Data by City
Filter the DataFrame based on the city or cities of interest.

In [0]:
# Define the cities of interest
cities_of_interest = ['city1', 'city2', 'city3']  # replace with your cities

# Filter the DataFrame based on the city or cities of interest
df_filtered = df[df['location'].isin(cities_of_interest)]

# Save the filtered DataFrame to a CSV file
df_filtered.to_csv('filtered_inaturalist_data.csv', index=False)

print("Filtered data saved to 'filtered_inaturalist_data.csv'.")

# Save Data to CSV
Save the filtered data to a CSV file using pandas.

In [0]:
# Save the filtered DataFrame to a CSV file
df_filtered.to_csv('filtered_inaturalist_data.csv', index=False)

print("Filtered data saved to 'filtered_inaturalist_data.csv'.")