# Analytics DataCube

> 👋 Before moving on with this demo, you must first sign-up and request your Geosys APIs credentials here :
> - ⚙️[Try it now](https://earthdailyagro.com/geosys-registration/)

> For more information about our Geosys APIs : 
> - 📚 [Geosys APIs to connect with your digital ag application](https://app.geosys.com/#/documentation)


> **Demo Project:** This demo demonstrates the ability to create an Analytics Datacube of CLEAR images based on geosysPy.
The generated output is a zarr file  available on the selected cloud storage provider. (AWS/Azure Blob Storage)



### @author: Geosys



 ## 1️⃣ Import all librairies needed and get an autorization to use Analytics Datacube

In [None]:
import sys
import os
sys.path.append(os.path.join('..','src'))
from geosyspy import Geosys
from geosyspy.utils.constants import *
from dotenv import load_dotenv
from datetime import datetime
from dateutil.relativedelta import relativedelta
from analytics_datacube.processor import AnalyticsDatacube
from analytics_datacube.utils import dataset_to_zarr_format
import datetime as dt
import logging
import xarray
from cloud_storage import cloud_storage_aws,cloud_storage_azure

logger = logging.getLogger()
#logger.setLevel(logging.ERROR)
logger.setLevel(logging.WARNING)

# read .env file
load_dotenv()

API_CLIENT_ID = os.getenv('API_CLIENT_ID')
API_CLIENT_SECRET = os.getenv('API_CLIENT_SECRET')
API_USERNAME = os.getenv('API_USERNAME')
API_PASSWORD = os.getenv('API_PASSWORD')


# Create and initialize the client
print("Initializing the client...")
client = AnalyticsDatacube(API_CLIENT_ID, API_CLIENT_SECRET, API_USERNAME, API_PASSWORD, Env.PROD, Region.NA)
print("Client initialized !")

 ## 2️⃣ Input data

These input parameters are utilized in the demo to generate Analytics DataCube


##### polygon: 
A polygon string in WKT or GeoJson format. This polygon defines the boundaries of the area under analysis.

#### start_date: 
A datetime object representing the start date of generated DataCube.

#### end_date: 
A datetime object representing the end date of generated  DataCube

#### indicators: 
A string array representing index indicators on wich the analysis is made.
Possible values for indicator are:
- ndvi
- evi
- gndvi
- ndwi
- cvi
- cvin
- lai

In [None]:
# WKT 
#polygon = "POLYGON((-90.41 41.6663, -90.41 41.6545, -90.3775 41.6541, -90.3778 41.6660, -90.41 41.6663))"
polygon ="POLYGON((-90.41169914 41.66631642, -90.41178502 41.6545818, -90.37753855 41.65413284, -90.37788188 41.666059940000004, -90.41169914 41.66631642))"
# GeoJson
#polygon = '{"type": "Polygon","coordinates": [[[-90.41, 41.6663],[-90.41, 41.6545],[-90.3775, 41.6541],[-90.3778, 41.666],[-90.41, 41.6663]]]}'

endDate = dt.date.today()

startDate = endDate + relativedelta(months=-12)

indicators = ["NDVI", "NDWI", "EVI"]

## 3️⃣ Generate the analytics datacube
Generate an analytics datacube compute on each index values by pixel over the defined period for the specified polygon. 

In [None]:
analytics_datacube = client.generate_analytics_datacube(polygon, startDate, endDate, indicators)
analytics_datacube

## 4️⃣ Save the generated analytics DataCube in a Zarr file

In [None]:
# Save as zarr
zarr_path = dataset_to_zarr_format(analytics_datacube)
zarr_path

## 5️⃣ Upload result on Cloud Storage accounts


### Upload on AWS Cloud Storage

In [None]:
if cloud_storage_aws.upload_folder_to_aws_s3(zarr_path):
    print("Analytics DataCube uploaded to AWS")
    print(f'S3 uri: {cloud_storage_aws.get_s3_uri_path(zarr_path)}')
else:
    print("Issue to upload Analytics DataCube on AWS")

### Upload on Azure Cloud Storage 

In [None]:
if cloud_storage_azure.upload_directory_to_azure_blob_storage(zarr_path):
    print("Analytics DataCube uploaded to Azure Blob Storage")
    print(f'Azure Blob url: {cloud_storage_azure.get_azure_blob_url_path(zarr_path)}')

else:
    print("Issue to upload Analytics DataCube on Azure Blob Storage")

##  6️⃣ Display the results
Visualize the results using matplotlib

### Time series for each indexes

In [None]:
from matplotlib import pyplot as plt
import numpy as np

# get list of available bands from the analytics datacube
bands = list(analytics_datacube.data_vars.keys())

for band in bands:
    plt.figure(figsize=(10, 6))
    indicator_ds = analytics_datacube.sel(band=band.upper())[band].sortby('time')

    # Exclude NaN values to calculate the mean
    masked_data_array = indicator_ds.where(~np.isnan(indicator_ds))

    # index mean calculation
    mean_index = masked_data_array.mean(dim=['x', 'y'])

    # plot 
    mean_index.plot.line(x='time')

    # labels & title
    plt.xlabel('Time')
    plt.ylabel(band)
    plt.title(f'{band.upper()} Time Series')
    plt.grid()    
    plt.show()

### Cumulative index indicator values over the defined period.

In [None]:
# get list of available bands from the analytics datacube
bands = list(analytics_datacube.data_vars.keys())

for band in bands:
    plt.figure(figsize=(10, 6))
    indicator_ds = analytics_datacube.sel(band=band.upper())[band].sortby('time')

    # Exclude NaN values to calculate the mean
    masked_data_array = indicator_ds.where(~np.isnan(indicator_ds))

    # index mean calculation
    mean_index = masked_data_array.mean(dim=['x', 'y'])

    # cumsum mean index calculation
    cumul_index_ds = mean_index.cumsum(dim='time')

    # plot 
    cumul_index_ds.plot.line(x='time')

    # labels & title
    plt.xlabel('Time')
    plt.ylabel(band)
    plt.title(f'{band.upper()} cumulative mean values over time')
    plt.grid()    
    plt.show()