In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from scipy import stats

In [None]:
def plot_flight_weight_distrib(flight_matrix, binwidth, title, save_name):
    sns.histplot(flight_matrix.flatten(), binwidth=binwidth, kde=True)
    plt.title(title)
    plt.xlabel("Number of Flights")
    plt.savefig(save_name, bbox_inches='tight', facecolor='white')
    plt.show()

## Check Adjacency 10x10 Matrix on Several Days

In [None]:
datasetv13 = np.load("./datasets/10_countries_dataset_v13_node_pert.npz")
datasetv13.files

In [None]:
datasetv13['flight_matrix_unscaled'][0]

In [None]:
datasetv13['flight_matrix_unscaled'][2]

In [None]:
datasetv13['flight_matrix_unscaled'].sum(axis=0)[:,2]

In [None]:
print(datasetv13['flight_matrix_unscaled'].flatten().max())
print(datasetv13['flight_matrix_unscaled'].flatten().mean())
print(datasetv13['flight_matrix_unscaled'].flatten().min())

## Plot Unscaled and Scaled Flights Distribution

In [None]:
skewness = round(stats.skew(datasetv13['flight_matrix_unscaled'].flatten()), 5)
plot_flight_weight_distrib(
    flight_matrix=datasetv13['flight_matrix_unscaled'], 
    binwidth=1,
    title="Dataset v13 Unscaled Flight Weights Distribution (Skewness: {})".format(str(skewness)),
    save_name="./v13_unscaled_all_flight_distrib")

In [None]:
skewness = round(stats.skew(datasetv13['flight_matrix_log10_scaled'].flatten()), 5)
plot_flight_weight_distrib(
    flight_matrix=datasetv13['flight_matrix_log10_scaled'], 
    binwidth=0.1,
    title="Dataset v13 Log10 Scaled Flight Weights Distribution (Skewness: {})".format(str(skewness)),
    save_name="./v13_log10_all_flight_distrib")

In [None]:
max(datasetv13['flight_matrix_unscaled'][:, 9, :].flatten())

### Plot specific country flight distribution in dataset v13

In [None]:
# Plot v13 for specific country
countries = ["Brazil", "Germany", "Spain", "France", "Britain", "India", "Italy", "Russia", "Turkey", "USA"]
for country_idx in range(10):
    max_val = max(datasetv13['flight_matrix_unscaled'][:, country_idx, :].flatten())
    bin_width = max(max_val // 20, 1)
    sns.histplot(datasetv13['flight_matrix_unscaled'][:, country_idx, :].flatten(), binwidth=bin_width, kde=True)
    # plt.ylim(0, 6000)
    plt.title("Dataset v13 " + countries[country_idx] + " Unscaled Flight Weights Distribution (Skewness: " + str(round(stats.skew(datasetv13['flight_matrix_unscaled'][:, country_idx, :].flatten()), 5)) + ")")
    plt.xlabel("Number of Flights")
    plt.savefig("./v13_unscaled_" + countries[country_idx] + "_flight_distrib", bbox_inches='tight', facecolor='white')
    plt.clf()

In [None]:
# Plot v13 for specific country
countries = ["Brazil", "Germany", "Spain", "France", "Britain", "India", "Italy", "Russia", "Turkey", "USA"]
for country_idx in range(10):
    max_val = max(datasetv13['flight_matrix_log10_scaled'][:, country_idx, :].flatten())
    bin_width = max(max_val / 20, 0.1)
    sns.histplot(datasetv13['flight_matrix_log10_scaled'][:, country_idx, :].flatten(), binwidth=bin_width, kde=True)
    # plt.ylim(0, 6000)
    plt.title("Dataset v13 " + countries[country_idx] + " Log 10 Flight Weights Distribution (Skewness: " + str(round(stats.skew(datasetv13['flight_matrix_log10_scaled'][:, country_idx, :].flatten()), 5)) + ")")
    plt.xlabel("Number of Flights")
    plt.savefig("./v13_log10_" + countries[country_idx] + "_flight_distrib", bbox_inches='tight', facecolor='white')
    plt.clf()

### Dataset v14

In [None]:
datasetv14 = np.load("./datasets/10_countries_dataset_v14_node_pert.npz")
datasetv14.files

In [None]:
skewness = round(stats.skew(datasetv14['flight_matrix_unscaled'].flatten()), 5)
plot_flight_weight_distrib(
    flight_matrix=datasetv14['flight_matrix_unscaled'], 
    binwidth=20,
    title="Dataset v14 Unscaled Flight Weights Distribution (Skewness: {})".format(str(skewness)),
    save_name="./v14_unscaled_all_flight_distrib")

In [None]:
skewness = round(stats.skew(datasetv14['flight_matrix_log10_scaled'].flatten()), 5)
plot_flight_weight_distrib(
    flight_matrix=datasetv14['flight_matrix_log10_scaled'], 
    binwidth=0.1,
    title="Dataset v14 Log10 Scaled Flight Weights Distribution (Skewness: {})".format(str(skewness)),
    save_name="./v14_log10_all_flight_distrib")

### Specific country flight distribution in dataset v14

In [None]:
datasetv14['flight_matrix_unscaled'][:, 9, :].shape

In [None]:
# Plot unscaled 10 countries
countries = ["Brazil", "Germany", "Spain", "France", "Britain", "India", "Italy", "Russia", "Turkey", "USA"]
for country_idx in range(10):
    max_val = max(datasetv14['flight_matrix_unscaled'][:, country_idx, :].flatten())
    bin_width = max(max_val // 20, 1)
    sns.histplot(datasetv14['flight_matrix_unscaled'][:, country_idx, :].flatten(), binwidth=bin_width, kde=True)
    # plt.ylim(0, 6000)
    plt.title("Dataset v14 " + countries[country_idx] + " Unscaled Flight Weights Distribution (Skewness: " + str(round(stats.skew(datasetv14['flight_matrix_unscaled'][:, country_idx, :].flatten()), 5)) + ")")
    plt.xlabel("Number of Flights")
    plt.savefig("./v14_unscaled_" + countries[country_idx] + "_flight_distrib", bbox_inches='tight', facecolor='white')
    plt.clf()

In [None]:
# Plot scaled for 10 countries
countries = ["Brazil", "Germany", "Spain", "France", "Britain", "India", "Italy", "Russia", "Turkey", "USA"]
for country_idx in range(10):
    max_val = max(datasetv14['flight_matrix_log10_scaled'][:, country_idx, :].flatten())
    bin_width = max(max_val / 20, 0.1)
    sns.histplot(datasetv14['flight_matrix_log10_scaled'][:, country_idx, :].flatten(), binwidth=bin_width, kde=True)
    # plt.ylim(0, 6000)
    plt.title("Dataset v14 " + countries[country_idx] + " Log 10 Flight Weights Distribution (Skewness: " + str(round(stats.skew(datasetv14['flight_matrix_log10_scaled'][:, country_idx, :].flatten()), 5)) + ")")
    plt.xlabel("Number of Flights")
    plt.savefig("./v14_log10_" + countries[country_idx] + "_flight_distrib", bbox_inches='tight', facecolor='white')
    plt.clf()

# Dataset v15

In [None]:
datasetv15 = np.load("./datasets/_old/10_countries_dataset_v15_node_pert.npz")
datasetv15.files

In [None]:
skewness = round(stats.skew(datasetv15['flight_matrix_unscaled'].flatten()), 5)
plot_flight_weight_distrib(
    flight_matrix=datasetv15['flight_matrix_unscaled'], 
    binwidth=20,
    title="Dataset v15 Unscaled Flight Weights Distribution (Skewness: {})".format(str(skewness)),
    save_name="./v15_unscaled_all_flight_distribution")

In [None]:
skewness = round(stats.skew(datasetv15['flight_matrix_log10_scaled'].flatten()), 5)
plot_flight_weight_distrib(
    flight_matrix=datasetv15['flight_matrix_log10_scaled'], 
    binwidth=0.1,
    title="Dataset v15 Log10 Scaled Flight Weights Distribution (Skewness: {})".format(str(skewness)),
    save_name="./v15_log10_all_flight_distrib")

# Dataset v16 Continents

In [None]:
datasetv16 = np.load("./datasets/10_continents_dataset_v16_node_pert.npz")
datasetv16.files

In [None]:
skewness = round(stats.skew(datasetv16['flight_matrix_unscaled'].flatten()), 5)
plot_flight_weight_distrib(
    flight_matrix=datasetv16['flight_matrix_unscaled'], 
    binwidth=2000,
    title="Dataset v16 Unscaled Flight Weights Distribution (Skewness: {})".format(str(skewness)),
    save_name="./v16_unscaled_all_flight_distribution")

In [None]:
skewness = round(stats.skew(datasetv16['flight_matrix_log10_scaled'].flatten()), 5)
plot_flight_weight_distrib(
    flight_matrix=datasetv16['flight_matrix_log10_scaled'], 
    binwidth=0.2,
    title="Dataset v16 Log10 Scaled Flight Weights Distribution (Skewness: {})".format(str(skewness)),
    save_name="./v16_log10_all_flight_distrib")

# Dataset v17

In [None]:
datasetv17 = np.load("./datasets/10_continents_dataset_v17_node_pert.npz")
datasetv17.files

In [None]:
datasetv17["flight_matrix_unscaled"].shape

In [None]:
# Plot flight distribution scaled for 10 continents
continents = ["Africa", "North America", "South America", "Oceania", "Eastern Europe", "Western Europe", "Middle East", "South Asia", "Southeast-East Asia", "Central Asia"]
def plot_continents_zero_distributions(flight_matrix, ymax, title, save_name):
    fig, ax = plt.subplots(nrows=2, ncols=5, figsize=(25,10))
    fig.suptitle(title, fontsize= 30)
    
    idx = 0
    for row in ax:
        for col in row:
            max_val = max(flight_matrix[:, idx, :].flatten())
            bin_width = max(max_val / 15., 0.1)
            sns.histplot(ax=col, data=flight_matrix[:, idx, :].flatten(), binwidth=bin_width, kde=True)
            col.set_ylim(0, ymax)
            skewness = round(stats.skew(flight_matrix[:, idx, :].flatten()), 5)
            col.set_title("{}\n(Skewness: {})".format(continents[idx], skewness))
            col.set_xlabel("Number of Flights")
            idx += 1
    plt.tight_layout()
    plt.savefig(save_name, bbox_inches='tight', facecolor='white')
    plt.show()

In [None]:
plot_continents_zero_distributions(
    flight_matrix=datasetv17["flight_matrix_log10_scaled"],
    ymax=4500,
    title="Dataset v17 Log10 Flight Weight Distribution Per Continent",
    save_name="./v17_log10_all_flight_distrib.png"
)

In [None]:
datasetv17_smoothened_flights = np.load("./datasets/10_continents_dataset_v17_node_pert_7day_flight_smoothen.npz")
datasetv17_smoothened_flights.files

In [None]:
plot_continents_zero_distributions(
    flight_matrix=datasetv17_smoothened_flights["flight_matrix_log10_scaled"],
    ymax=4500,
    title="Dataset v17 Log10 7-day Smoothen Flight Weight Distribution Per Continent",
    save_name="./v17_log10_7day_smoothen_all_flight_distrib.png"
)

In [None]:
datasetv17_14day_smoothened_flights = np.load("./datasets/10_continents_dataset_v17_node_pert_14day_flight_cases_smoothen.npz")
datasetv17_14day_smoothened_flights.files

In [None]:
plot_continents_zero_distributions(
    flight_matrix=datasetv17_14day_smoothened_flights["flight_matrix_log10_scaled"],
    ymax=4500,
    title="Dataset v17 Log10 14-day Smoothen Flight Weight Distribution Per Continent",
    save_name="./v17_log10_14day_smoothen_all_flight_distrib.png"
)

In [None]:
datasetv17_no_flight_zeros = np.load("./datasets/10_continents_dataset_v17_node_pert_no_flight_zeros.npz")
datasetv17_no_flight_zeros.files

In [None]:
plot_continents_zero_distributions(
    flight_matrix=datasetv17_no_flight_zeros["flight_matrix_log10_scaled"],
    ymax=4500,
    title="Dataset v17 Log10 No Flight Zeros Flight Weight Distribution Per Continent",
    save_name="./v17_log10_no_flight_zeros_all_flight_distrib.png"
)

In [None]:
def plot_zero_contribution_barplot(flight_matrix):
    continent_names = []
    heights = []
    for i in range(10):
        name = "\n".join(continents[i].split(" "))
        continent_names.append(name)
        zero_results_np = np.where(flight_matrix[:, i, :] == 0)
        heights.append(len(zero_results_np[0]))  #  - len(datasetv16['flight_matrix_log10_scaled'])
    
    plt.figure(figsize=(12, 4))
    plt.bar(x=continent_names, height=heights)
    plt.title("Number of Zeros in Outgoing Flights Per Continent")
    plt.xlabel("Continent")
    plt.ylabel("Count")
    plt.savefig("./continent_zero_counts.png", bbox_inches="tight", facecolor="white")
    plt.show()

In [None]:
plot_zero_contribution_barplot(datasetv16['flight_matrix_log10_scaled'])

In [None]:
"\n".join(continents[1].split(" "))

# Dataset v18

In [None]:
datasetv18 = np.load("./datasets/10_continents_dataset_v18_node_pert.npz")
datasetv18.files

In [None]:
datasetv18["flight_matrix_unscaled"].shape

In [None]:
# Plot flight distribution scaled for 10 continents
continents = ["Africa", "North America", "South America", "Oceania", "Eastern Europe", "Western Europe", "Middle East", "South Asia", "Southeast-East Asia", "Central Asia"]
def plot_continents_zero_distributions(flight_matrix, ymax, title, save_name):
    fig, ax = plt.subplots(nrows=2, ncols=5, figsize=(25,10))
    fig.suptitle(title, fontsize= 30)
    
    idx = 0
    for row in ax:
        for col in row:
            max_val = max(flight_matrix[:, idx, :].flatten())
            bin_width = max(max_val / 15., 0.1)
            sns.histplot(ax=col, data=flight_matrix[:, idx, :].flatten(), binwidth=bin_width, kde=True)
            col.set_ylim(0, ymax)
            skewness = round(stats.skew(flight_matrix[:, idx, :].flatten()), 5)
            col.set_title("{}\n(Skewness: {})".format(continents[idx], skewness))
            col.set_xlabel("Number of Flights")
            idx += 1
    plt.tight_layout()
    plt.savefig(save_name, bbox_inches='tight', facecolor='white')
    plt.show()

In [None]:
plot_continents_zero_distributions(
    flight_matrix=datasetv18["flight_matrix_log10_scaled"],
    ymax=5000,
    title="Dataset v18 Log10 Flight Weight Distribution Per Continent",
    save_name="./v18_log10_all_flight_distrib.png"
)

In [None]:
plot_continents_zero_distributions(
    flight_matrix=datasetv18["flight_matrix_unscaled"],
    ymax=5000,
    title="Dataset v18 Unscaled Flight Weight Distribution Per Continent",
    save_name="./v18_unscaled_all_flight_distrib.png"
)

In [None]:
len(np.where(datasetv18["flight_matrix_unscaled"].flatten() == 0)[0])

In [None]:
len(np.where(datasetv18["flight_matrix_log10_scaled"].flatten() == 0)[0])

In [None]:
datasetv18["flight_matrix_log10_scaled"].shape

In [None]:
datasetv18["flight_matrix_unscaled"].shape

In [None]:
567 * 100

In [None]:
4000 / 56700

In [None]:
len(np.where(datasetv18["flight_matrix_unscaled"].flatten() == 1)[0])

In [None]:
np.where(datasetv18["flight_matrix_unscaled"] == 1)

In [None]:
datasetv18["flight_matrix_unscaled"][0]

In [None]:
continents

## Plot Incoming and Outgoing Edges Over Rolling Windows

In [None]:
WINDOW_SIZE = 7
def plot_flights_over_days(dataset_flight_matrix, flight_type):
    """
    This function plots a figure of outgoing edges for each country over each day of the dataset
    
    Args:
        - dataset_flight_matrix: Shape (dataset_len, 10, 10)
    """
    assert flight_type in ["Incoming", "Outgoing", "Combined"]
    countries = ["Brazil", "Germany", "Spain", "France", "Britain", "India", "Italy", "Russia", "Turkey", "USA"]
    
    if flight_type == "Outgoing":
        daywise_flights = np.nansum(dataset_flight_matrix, axis=1)
    elif flight_type == "Incoming":
        daywise_flights = np.nansum(dataset_flight_matrix, axis=2)
    elif flight_type == "Combined":
        daywise_flights = np.nansum(dataset_flight_matrix, axis=1) + np.nansum(dataset_flight_matrix, axis=2)

    country_dict = { countries[idx]: daywise_flights[:, idx] for idx in range(len(countries)) }
    country_dict["Day Index"] = list(range(len(daywise_flights)))
    visual_df = pd.DataFrame(country_dict)

    plt.figure(figsize=(18, 6), dpi=80)
    sns.lineplot(x='Day Index', y='Total Number of {} Flights'.format(flight_type), hue='Country', data=pd.melt(visual_df, ['Day Index'], value_name='Total Number of {} Flights'.format(flight_type), var_name="Country"))
    plt.title("Total Number of {} Flights Per Day".format(flight_type), fontsize= 16)

    filename = "v15_daywise_num_{}_flights".format(flight_type)
    plt.savefig("./" + filename + '.png', bbox_inches='tight', facecolor='white')
    plt.clf()
    plt.close()


In [None]:
plot_flights_over_days(datasetv15['flight_matrix_unscaled'], flight_type="Outgoing")
plot_flights_over_days(datasetv15['flight_matrix_unscaled'], flight_type="Incoming")
plot_flights_over_days(datasetv15['flight_matrix_unscaled'], flight_type="Combined")

In [None]:
WINDOW_SIZE = 7
def plot_flights_over_rolling_windows(dataset_flight_matrix, flight_type):
    """
    This function plots a figure of outgoing edges for each country over each rolling window of the dataset
    
    Args:
        - dataset_flight_matrix: Shape (dataset_len, 10, 10)
    """
    assert flight_type in ["Incoming", "Outgoing", "Combined"]
    countries = ["Brazil", "Germany", "Spain", "France", "Britain", "India", "Italy", "Russia", "Turkey", "USA"]
    
    if flight_type == "Outgoing":
        daywise_flights = np.nansum(dataset_flight_matrix, axis=1)
    elif flight_type == "Incoming":
        daywise_flights = np.nansum(dataset_flight_matrix, axis=2)
    elif flight_type == "Combined":
        daywise_flights = np.nansum(dataset_flight_matrix, axis=1) + np.nansum(dataset_flight_matrix, axis=2)

    roll_win_flights = [daywise_flights[idx: idx+30+WINDOW_SIZE, :].mean(axis=0) for idx in range(len(daywise_flights) - 30 - WINDOW_SIZE - WINDOW_SIZE)]
    roll_win_flights = np.array(roll_win_flights)

    country_dict = { countries[idx]: roll_win_flights[:, idx] for idx in range(len(countries)) }
    country_dict["Rolling Window Index"] = list(range(len(roll_win_flights)))
    visual_df = pd.DataFrame(country_dict)

    plt.figure(figsize=(18, 6), dpi=80)
    sns.lineplot(x='Rolling Window Index', y='Total Number of {} Flights'.format(flight_type), hue='Country', data=pd.melt(visual_df, ['Rolling Window Index'], value_name='Total Number of {} Flights'.format(flight_type), var_name="Country"))
    plt.title("Total Number of {} Flights Per Rolling Window".format(flight_type), fontsize= 16)

    filename = "v15_roll_win_num_{}_flights".format(flight_type)
    plt.savefig("./" + filename + '.png', bbox_inches='tight', facecolor='white')
    plt.clf()
    plt.close()


In [None]:
plot_flights_over_rolling_windows(datasetv15['flight_matrix_unscaled'], flight_type="Outgoing")
plot_flights_over_rolling_windows(datasetv15['flight_matrix_unscaled'], flight_type="Incoming")
plot_flights_over_rolling_windows(datasetv15['flight_matrix_unscaled'], flight_type="Combined")

### Scatterplot between USA ranking and num_flights

In [None]:
ANALYSIS_DIR = "2022-03-03-21_17_42"
roll_win_aggreg_diff_nested_list = np.load("./analysis-runs-multiple-models/" + ANALYSIS_DIR + "/prediction_saves/DCSAGE_7day_100model_v15_10x10.npy")
# roll_win_aggreg_diff_nested_list = np.load("./sensitivity_MPNN_10x10_info.npy")
print(roll_win_aggreg_diff_nested_list.shape)

# Get sensitivity score distribution for each country
sensitivty_score_nested_np = np.nansum(np.array(roll_win_aggreg_diff_nested_list), axis=3)
print("Shape:", sensitivty_score_nested_np.shape)

In [None]:
roll_win_loc_params = []
roll_win_scale_params = []
for roll_win in range(len(sensitivty_score_nested_np)):
    location_params = []
    scale_params = []
    for country_idx in range(10):
        values = sensitivty_score_nested_np[roll_win,:,country_idx]
        # params = stats.gumbel_l.fit(values)
        params = stats.norm.fit(values)
        location_params.append(params[0])
        scale_params.append(params[1])
    
    roll_win_loc_params.append(location_params)
    roll_win_scale_params.append(scale_params)

roll_win_loc_params = np.array(roll_win_loc_params)
roll_win_scale_params = np.array(roll_win_scale_params)
print(roll_win_loc_params.shape)
print(roll_win_scale_params.shape)

In [None]:
idx = 9
daywise_outgoing_flights = np.nanmean(datasetv14['flight_matrix_log10_scaled'][:, idx, :], axis=1)
rollwin_outgoing_flights = [daywise_outgoing_flights[idx: idx+30].mean() for idx in range(len(daywise_outgoing_flights) - 30)]
rollwin_outgoing_flights = np.array(rollwin_outgoing_flights)[:-14]

In [None]:
EULER_MASCHERONI_CONSTANT = 0.57721566490153286060651209008240243104215933593992
roll_win_gumbel_means = np.zeros(roll_win_loc_params.shape)

# Mean formula: Mu - Scale * Euler Mascharoni constant
for roll_win in range(roll_win_gumbel_means.shape[0]):
    for country_idx in range(roll_win_gumbel_means.shape[1]):
        roll_win_gumbel_means[roll_win,country_idx] = roll_win_loc_params[roll_win,country_idx] - roll_win_scale_params[roll_win,country_idx] * EULER_MASCHERONI_CONSTANT

In [None]:
plt.scatter(rollwin_outgoing_flights, roll_win_gumbel_means[:,idx])
plt.show()