In [None]:
import pandas as pd
import numpy as np
import sys
from sklearn.preprocessing import StandardScaler, MinMaxScaler

sys.path.append("../../")
from utils.data_processing import _drop_consecutive_nans, add_day_ahead_column
from utils.error_metrics import _calc_mae, _calc_mse, _calc_rmse, _calc_nrmse, _calc_mape, _calc_mase, _calc_msse, _seas_naive_fcst, _calc_metrics
from utils.clustering import mapping_tsfeatures, clustering, sum_until_threshold, mapping_energy_metrics

### ML AZURE
from azureml.core import Workspace, Dataset, Datastore
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
import mlflow

from config import subscription_id, resource_group, workspace_name
workspace = Workspace(subscription_id, resource_group, workspace_name)
datastore = Datastore.get(workspace, "workspaceblobstore")
credential = DefaultAzureCredential()

ml_client = MLClient(
    credential=credential,
    subscription_id = subscription_id,
    resource_group_name = resource_group,
    workspace_name = workspace_name
)

import warnings
warnings.simplefilter(action='ignore', category=Warning)

### Plotting
import plotly.graph_objs as go
import plotly.express as px
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.size'] = 13  # Font size
stanford_colors = ['#1f78b5', '#33a12c', '#e41a1c', '#ff7f00', '#6a3d9b', '#b25928', #dark
                   '#a7cfe4', '#b3e08b', '#fc9b9a', '#fec06f', '#cbb3d7', '#ffff9a'] #light
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=stanford_colors)

# 1. Data Loading

In [None]:
### Load data 

uri = "azureml://subscriptions/workspaceblobstore/paths/load_signal.csv"
df_load = pd.read_csv(uri)
df_load = pd.melt(df_load, id_vars=['Time'], var_name='ID', value_name='y')
df_load.rename(columns={'Time':'ds'}, inplace=True)
df_load['ds'] = pd.to_datetime(df_load['ds'])

In [None]:
### Mapping

uri = "azureml://subscriptions/workspaceblobstore/paths/network_nodes.csv"
mapping = pd.read_csv(uri)
mapping['ID'] = mapping['ID'].astype(str)
median = mapping.groupby('country').agg({'latitude':'median', 'longitude':'median'}).reset_index()
median = median.rename(columns={'latitude':'latitude_median', 'longitude':'longitude_median'})

In [None]:
### Load country weather data and mapping

uri = "azureml://subscriptions/workspaceblobstore/paths/01_open-meteo_countrymedian.csv"
country_weather = pd.read_csv(uri)
uri = "azureml://subscriptions/workspaceblobstore/paths/01_open-meteo_countrymedian_mapping.csv"
weather_mapping = pd.read_csv(uri)
weather_mapping = weather_mapping.merge(median, left_index=True, right_index=True, how='left')

country_weather['time'] = pd.to_datetime(country_weather['time'], unit='s')
country_weather = country_weather.merge(weather_mapping[['location_id', 'country']], on='location_id', how='left')
country_weather = country_weather.drop(columns=['location_id'])
country_weather = country_weather.rename(columns={'time':'ds'})
country_weather['ds'] = pd.to_datetime(country_weather['ds'])
country_weather = country_weather[['country', 'ds', 'temperature_2m (°C)', 'relative_humidity_2m (%)', 'precipitation (mm)', 'cloud_cover (%)', 'wind_speed_10m (km/h)']]
country_weather = country_weather.rename(columns={'temperature_2m (°C)':'temp', 'relative_humidity_2m (%)':'humidity', 'precipitation (mm)':'precipitation', 'cloud_cover (%)':'cloud', 'wind_speed_10m (km/h)':'wind'})

# 2. Bus Data and Weather

In [None]:
### Load bus weather

uri = "azureml://subscriptions/workspaceblobstore/paths/weather_data.csv"
bus_weather = pd.read_csv(uri)
bus_weather

In [None]:
### Preprocessing bus weather

from scipy.spatial import cKDTree

tree = cKDTree(bus_weather[['latitude', 'longitude']])
distances, indices = tree.query(mapping[['latitude', 'longitude']], k=1)
closest_coordinates = bus_weather.iloc[indices]
closest_coordinates = closest_coordinates.rename(columns={'latitude':'latitude_closest', 'longitude':'longitude_closest'})
weather_mapping_bus = pd.concat([mapping.reset_index(drop=True), closest_coordinates.reset_index(drop=True)], axis=1)
weather_mapping_bus = weather_mapping_bus.drop(columns=['time', 't2m'])

### Merge load with weather
temp = weather_mapping_bus[['ID', 'latitude_closest', 'longitude_closest']]
bus_weather = bus_weather.rename(columns={'time':'ds'})
df_load = df_load.merge(temp, on='ID', how='left')
df_load = df_load.rename(columns={'latitude_closest':'latitude', 'longitude_closest':'longitude'})
df_load['ds'] = pd.to_datetime(df_load['ds'])
bus_weather['ds'] = pd.to_datetime(bus_weather['ds'])
df_load = df_load.merge(bus_weather, on=['latitude', 'longitude', 'ds'], how='left')
df_load = df_load.drop(columns=['latitude', 'longitude'])

df_load.to_csv('01_load_bus.csv', index=False)

# 3. Aggregate Country Data

In [None]:
### Preparation

temp = df_load.merge(mapping, on='ID', how='left')
temp = temp[['ds', 'ID', 'y', 'country']]
temp = temp.groupby(['ds', 'country']).agg({'y':'sum'}).reset_index()
df_load_country = temp.merge(country_weather, on=['country', 'ds'], how='left')
df_load_country.to_csv('00_load_country.csv', index=False)