In [1]:
# set root directory
import os

os.chdir('../')
os.getcwd()

'c:\\Users\\HP\\Desktop\\clustering-moroccan-weather-data'

In [2]:
# imports
import numpy as np
import pandas as pd

In [3]:
# imports for plots
from plotly import graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio

pio.templates.default = "plotly_white"

In [4]:
# general params
LAT_SIDI_SLIMANE = -5.9240
LON_SIDI_SLIMANE = 34.2623
SPACIAL_RESOLUTION_SATELLITE = 0.25

# for invariant lat 
SUP_BOUNDARY = 35.8
INF_BOUNDARY = 29.65

nb_crop_years = 39

In [5]:
nb_locations = int((SUP_BOUNDARY - INF_BOUNDARY) / SPACIAL_RESOLUTION_SATELLITE)
arr_lon = np.linspace(INF_BOUNDARY, SUP_BOUNDARY, nb_locations)

arr_lon

array([29.65      , 29.9173913 , 30.18478261, 30.45217391, 30.71956522,
       30.98695652, 31.25434783, 31.52173913, 31.78913043, 32.05652174,
       32.32391304, 32.59130435, 32.85869565, 33.12608696, 33.39347826,
       33.66086957, 33.92826087, 34.19565217, 34.46304348, 34.73043478,
       34.99782609, 35.26521739, 35.5326087 , 35.8       ])

In [6]:
# import modules
from py_scripts.fetch import LocationWeather
from py_scripts.process import process_data
from py_scripts.clustering import multivariate_clustering

# test for 3 cities
for lon in arr_lon:
    lat = LAT_SIDI_SLIMANE

    # fetch and process data
    raw_data = LocationWeather(lat, lon).weather_data_frame
    processed_data = process_data(raw_data)

    # cluster data
    dict_clusters = multivariate_clustering(processed_data)

    # prepare data frame
    rg = range(nb_crop_years)
    df_location = pd.DataFrame(
        { 
            "lat": [lat for i in rg],
            "lon": [lon for i in rg],
        }
    )

    # form year and cluster cols
    dict_year_cluster = { "year": [], "cluster": [] }
    for cluster_name, cluster_years in dict_clusters.items():
        dict_year_cluster["year"] += list(cluster_years)
        dict_year_cluster["cluster"] += [cluster_name for i in range(len(cluster_years))]

    # add them cols to df
    df_location = df_location.join(pd.DataFrame(dict_year_cluster))

    # save df
    df_location.to_csv(f"data_inv_lat/{lon};{lat}.csv", index=False)

In [7]:
# for invariant lon

SUP_BOUNDARY = -1.70
INF_BOUNDARY = -6.65

nb_locations = int((SUP_BOUNDARY - INF_BOUNDARY) / SPACIAL_RESOLUTION_SATELLITE)
arr_lat = np.linspace(INF_BOUNDARY, SUP_BOUNDARY, nb_locations)

arr_lat

array([-6.65 , -6.375, -6.1  , -5.825, -5.55 , -5.275, -5.   , -4.725,
       -4.45 , -4.175, -3.9  , -3.625, -3.35 , -3.075, -2.8  , -2.525,
       -2.25 , -1.975, -1.7  ])

In [8]:
# test for 3 cities
for lat in arr_lat:
    lon = LON_SIDI_SLIMANE

    # fetch and process data
    raw_data = LocationWeather(lat, lon).weather_data_frame
    processed_data = process_data(raw_data)

    # cluster data
    dict_clusters = multivariate_clustering(processed_data)

    # prepare data frame
    rg = range(nb_crop_years)
    df_location = pd.DataFrame(
        { 
            "lat": [lat for i in rg],
            "lon": [lon for i in rg],
        }
    )

    # form year and cluster cols
    dict_year_cluster = { "year": [], "cluster": [] }
    for cluster_name, cluster_years in dict_clusters.items():
        dict_year_cluster["year"] += list(cluster_years)
        dict_year_cluster["cluster"] += [cluster_name for i in range(len(cluster_years))]

    # add them cols to df
    df_location = df_location.join(pd.DataFrame(dict_year_cluster))

    # save df
    df_location.to_csv(f"data_inv_lon/{lon};{lat}.csv", index=False)