In [1]:
# change the working directory to point to the root directory
import os

os.chdir("../")

In [2]:
# imports
import numpy as np
import pandas as pd

In [3]:
# imports for plots
from plotly import graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio

pio.templates.default = "plotly_white"

In [4]:
# load data

summary_clustering = pd.read_csv("processed_data/summary_clustering_3.csv")

In [46]:
summary_clustering

Unnamed: 0,lat,lon,region,name,year_multi,cluster_multi,year_PRECTOT,cluster_PRECTOT
0,21.491028,-13.571798,Western Sahara,Western Sahara,2004,High,2004,High
1,21.491028,-13.571798,Western Sahara,Western Sahara,2011,High,2011,High
2,21.491028,-13.571798,Western Sahara,Western Sahara,2016,High,2016,High
3,21.491028,-13.571798,Western Sahara,Western Sahara,1988,Medium,1989,Medium
4,21.491028,-13.571798,Western Sahara,Western Sahara,1989,Medium,1990,Medium
...,...,...,...,...,...,...,...,...
27490,35.891028,-5.471798,Tangier-Tetouan,Ferme Abdellah Douasse,2015,Low,1995,Low
27491,35.891028,-5.471798,Tangier-Tetouan,Ferme Abdellah Douasse,2016,Low,1999,Low
27492,35.891028,-5.471798,Tangier-Tetouan,Ferme Abdellah Douasse,2018,Low,2012,Low
27493,35.891028,-5.471798,Tangier-Tetouan,Ferme Abdellah Douasse,2019,Low,2014,Low


In [None]:
# json structure
{
    "1982": {
        "High": {
            "lat": [],
            "lon": [],
            "region": [],
            "name": []
        },
        "Medium": {
            "lat": [],
            "lon": [],
            "region": [],
            "name": []
        },
        "Low": {
            "lat": [],
            "lon": [],
            "region": [],
            "name": []
        },
    },
    # ... other years
}

In [13]:
query = (summary_clustering["year_multi"] == 2000) & (summary_clustering["cluster_multi"] == "High")
summary_clustering[query]["lat"].values

array([], dtype=float64)

In [24]:
# general params
START_YEAR = 1982
NB_CROP_YEARS = 39

arr_crop_years = [START_YEAR + i for i in range(NB_CROP_YEARS)]
arr_cluster_names = ["High", "Medium", "Low"]

print(arr_crop_years)
print(arr_cluster_names)

[1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]
['High', 'Medium', 'Low']


In [57]:
# dict for multi clustering
dict_multi = {}

# loop over crop years
for year in arr_crop_years:
    dict_multi[str(year)] = {}

    for cluster_name in arr_cluster_names:
        dict_multi[str(year)][cluster_name] = {}
        
        # select year and cluster
        query = (summary_clustering["year_multi"] == year) & (summary_clustering["cluster_multi"] == cluster_name)
        data_year = summary_clustering[query]

        # fill data year
        for col in ["lat", "lon", "region",	"name"]:
            dict_multi[str(year)][cluster_name][col] = list(data_year[col].values)

In [59]:
dict_multi['2001']["High"]["lat"]

[34.69102828115297,
 34.69102828115297,
 34.991028281152964,
 34.991028281152964,
 35.29102828115297,
 35.29102828115297,
 35.591028281152965]

In [63]:
# store dict multi as json
import json

json_path_name = "ready_to_use_data/year_clusters/multi_clustering.json"

with open(json_path_name, 'w') as fp:
    json.dump(dict_multi, fp, indent=2)

In [64]:
# dict for multi clustering
dict_uni_PRECTOT = {}

# loop over crop years
for year in arr_crop_years:
    dict_uni_PRECTOT[str(year)] = {}

    for cluster_name in arr_cluster_names:
        dict_uni_PRECTOT[str(year)][cluster_name] = {}
        
        # select year and cluster
        query = (summary_clustering["year_PRECTOT"] == year) & (summary_clustering["cluster_PRECTOT"] == cluster_name)
        data_year = summary_clustering[query]

        # fill data year
        for col in ["lat", "lon", "region",	"name"]:
            dict_uni_PRECTOT[str(year)][cluster_name][col] = list(data_year[col].values)

In [66]:
dict_uni_PRECTOT["2000"]

{'High': {'lat': [], 'lon': [], 'region': [], 'name': []},
 'Medium': {'lat': [23.29102828115297,
   23.29102828115297,
   23.29102828115297,
   23.29102828115297,
   23.29102828115297,
   23.59102828115296,
   23.89102828115297,
   25.691028281152967,
   25.691028281152967,
   25.991028281152968,
   25.991028281152968,
   26.29102828115297,
   26.29102828115297,
   26.29102828115297,
   26.59102828115296,
   26.59102828115296,
   26.89102828115297,
   27.191028281152967,
   27.79102828115297,
   27.79102828115297,
   27.79102828115297,
   27.79102828115297,
   28.09102828115296,
   28.09102828115296,
   28.39102828115297,
   30.191028281152967,
   30.191028281152967,
   30.491028281152964,
   30.491028281152964,
   30.79102828115297,
   30.79102828115297,
   31.09102828115296,
   31.09102828115296,
   31.39102828115297,
   31.39102828115297,
   31.691028281152967,
   31.691028281152967,
   31.691028281152967,
   31.691028281152967,
   31.691028281152967,
   31.991028281152964,
   31.9

In [67]:
# store dict multi as json
import json

json_path_name = "ready_to_use_data/year_clusters/uni_clustering.json"

with open(json_path_name, 'w') as fp:
    json.dump(dict_uni_PRECTOT, fp, indent=2)