# CFLP Raw Data Exploration

In [5]:
import json
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import re
import numpy as np

# Get the directory of the current notebook
notebook_dir = os.path.dirname(os.path.abspath("__file__"))
# Set the working directory to the project root (one level up from the notebook directory)
analysis_dir = os.path.join(notebook_dir, os.pardir)
project_root = os.path.join(analysis_dir, os.pardir)
os.chdir(project_root)

# You can verify the new working directory
print(f"Current Working Directory: {os.getcwd()}")

# Configuration
CFLP_DATA_FILE = 'models/CFLP/data/capfacloc_data_10cust_10fac.json'

# Extract data configuration string (e.g., '10cust_10fac')
data_config_match = re.search(r'capfacloc_data_([\w_]+)\.json', CFLP_DATA_FILE)
data_config_str = data_config_match.group(1) if data_config_match else 'default'

OUTPUT_PLOTS_DIR = Path(f'results/cflp_raw_data_exploration_plots/{data_config_str}')
OUTPUT_PLOTS_DIR.mkdir(parents=True, exist_ok=True)

print(f"Plots will be saved to: {OUTPUT_PLOTS_DIR}")

Current Working Directory: /home/timpi/Projects/thesis/multi_agent_supply_chain_optimization
Plots will be saved to: results/cflp_raw_data_exploration_plots/10cust_10fac


## 1. Load Raw Data

In [6]:
try:
    with open(CFLP_DATA_FILE, 'r') as f:
        cflp_raw_data = json.load(f)
    print(f"Successfully loaded raw CFLP data from {CFLP_DATA_FILE}")
    print("Keys available:", cflp_raw_data.keys())
except FileNotFoundError:
    print(f"Error: Data file not found at {CFLP_DATA_FILE}")
    cflp_raw_data = {}
except json.JSONDecodeError as e:
    print(f"Error decoding JSON from {CFLP_DATA_FILE}: {e}")
    cflp_raw_data = {}

Successfully loaded raw CFLP data from models/CFLP/data/capfacloc_data_10cust_10fac.json
Keys available: dict_keys(['demands', 'capacities', 'fixed_costs', 'transportation_costs'])


## 2. Explore Data Structure and Contents

In [7]:
if cflp_raw_data:
    demands = cflp_raw_data.get('demands', [])
    capacities = cflp_raw_data.get('capacities', [])
    fixed_costs = cflp_raw_data.get('fixed_costs', [])
    transportation_costs = cflp_raw_data.get('transportation_costs', [])

    num_customers = len(demands)
    num_facilities = len(capacities)

    print(f"\nNumber of Customers: {num_customers}")
    print(f"Number of Facilities: {num_facilities}")

    print("\n--- Demands ---")
    print(f"Count: {len(demands)}")
    if demands: print(f"Min: {np.min(demands):.2f}, Max: {np.max(demands):.2f}, Avg: {np.mean(demands):.2f}, Std: {np.std(demands):.2f}")
    print("First 5 values:", demands[:5])

    print("\n--- Capacities ---")
    print(f"Count: {len(capacities)}")
    if capacities: print(f"Min: {np.min(capacities):.2f}, Max: {np.max(capacities):.2f}, Avg: {np.mean(capacities):.2f}, Std: {np.std(capacities):.2f}")
    print("First 5 values:", capacities[:5])

    print("\n--- Fixed Costs ---")
    print(f"Count: {len(fixed_costs)}")
    if fixed_costs: print(f"Min: {np.min(fixed_costs):.2f}, Max: {np.max(fixed_costs):.2f}, Avg: {np.mean(fixed_costs):.2f}, Std: {np.std(fixed_costs):.2f}")
    print("First 5 values:", fixed_costs[:5])

    print("\n--- Transportation Costs ---")
    print(f"Matrix Shape: ({len(transportation_costs)}x{len(transportation_costs[0]) if transportation_costs else 0})")
    if transportation_costs:
        # Print a subset for large matrices
        print("First 3x3 sub-matrix:")
        for i, row in enumerate(transportation_costs):
            if i < 3:
                print([f'{x:.2f}' for x in row[:3]])
            else:
                break
        # Flatten for statistics
        flat_tc = [item for sublist in transportation_costs for item in sublist]
        if flat_tc: print(f"Min: {np.min(flat_tc):.2f}, Max: {np.max(flat_tc):.2f}, Avg: {np.mean(flat_tc):.2f}, Std: {np.std(flat_tc):.2f}")
else:
    print("No raw data loaded for exploration.")


Number of Customers: 10
Number of Facilities: 10

--- Demands ---
Count: 10
Min: 9.00, Max: 35.00, Avg: 21.00, Std: 9.64
First 5 values: [15, 32, 9, 35, 33]

--- Capacities ---
Count: 10
Min: 83.00, Max: 118.00, Avg: 102.50, Std: 11.48
First 5 values: [98, 115, 87, 98, 105]

--- Fixed Costs ---
Count: 10
Min: 152.00, Max: 195.00, Avg: 169.30, Std: 14.61
First 5 values: [173, 154, 181, 157, 152]

--- Transportation Costs ---
Matrix Shape: (10x10)
First 3x3 sub-matrix:
['19.00', '17.00', '11.00']
['16.00', '18.00', '15.00']
['17.00', '13.00', '10.00']
Min: 10.00, Max: 19.00, Avg: 14.54, Std: 2.83


## 3. Basic Data Visualizations

In [8]:
if cflp_raw_data:
    demands = cflp_raw_data.get('demands', [])
    capacities = cflp_raw_data.get('capacities', [])
    fixed_costs = cflp_raw_data.get('fixed_costs', [])
    transportation_costs = cflp_raw_data.get('transportation_costs', [])

    # Demands distribution
    if demands:
        plt.figure(figsize=(8, 5))
        sns.histplot(demands, kde=True)
        plt.title('Distribution of Demands')
        plt.xlabel('Demand Value')
        plt.ylabel('Frequency')
        plt.savefig(OUTPUT_PLOTS_DIR / 'demands_distribution.png')
        plt.close()
        print(f"Plot saved: {OUTPUT_PLOTS_DIR / 'demands_distribution.png'}")

    # Capacities distribution
    if capacities:
        plt.figure(figsize=(8, 5))
        sns.histplot(capacities, kde=True, color='orange')
        plt.title('Distribution of Capacities')
        plt.xlabel('Capacity Value')
        plt.ylabel('Frequency')
        plt.savefig(OUTPUT_PLOTS_DIR / 'capacities_distribution.png')
        plt.close()
        print(f"Plot saved: {OUTPUT_PLOTS_DIR / 'capacities_distribution.png'}")

    # Fixed Costs distribution
    if fixed_costs:
        plt.figure(figsize=(8, 5))
        sns.histplot(fixed_costs, kde=True, color='green')
        plt.title('Distribution of Fixed Costs')
        plt.xlabel('Fixed Cost Value')
        plt.ylabel('Frequency')
        plt.savefig(OUTPUT_PLOTS_DIR / 'fixed_costs_distribution.png')
        plt.close()
        print(f"Plot saved: {OUTPUT_PLOTS_DIR / 'fixed_costs_distribution.png'}")

    # Transportation Costs heatmap (if not too large)
    if transportation_costs:
        tc_df = pd.DataFrame(transportation_costs)
        if tc_df.shape[0] <= 20 and tc_df.shape[1] <= 20: # Limit for readability
            plt.figure(figsize=(10, 8))
            sns.heatmap(tc_df, annot=True, cmap='viridis', fmt=".1f")
            plt.title('Transportation Costs Heatmap')
            plt.xlabel('Customer Index')
            plt.ylabel('Facility Index')
            plt.savefig(OUTPUT_PLOTS_DIR / 'transportation_costs_heatmap.png')
            plt.close()
            print(f"Plot saved: {OUTPUT_PLOTS_DIR / 'transportation_costs_heatmap.png'}")
        else:
            print("Transportation costs matrix too large for heatmap visualization. Plotting distribution instead.")
            # Plot distribution of flattened transportation costs
            flat_tc = [item for sublist in transportation_costs for item in sublist]
            if flat_tc:
                plt.figure(figsize=(8, 5))
                sns.histplot(flat_tc, kde=True, color='purple')
                plt.title('Distribution of Transportation Costs')
                plt.xlabel('Transportation Cost Value')
                plt.ylabel('Frequency')
                plt.savefig(OUTPUT_PLOTS_DIR / 'transportation_costs_distribution.png')
                plt.close()
                print(f"Plot saved: {OUTPUT_PLOTS_DIR / 'transportation_costs_distribution.png'}")
else:
    print("No raw data loaded for visualization.")

Plot saved: results/cflp_raw_data_exploration_plots/10cust_10fac/demands_distribution.png
Plot saved: results/cflp_raw_data_exploration_plots/10cust_10fac/capacities_distribution.png
Plot saved: results/cflp_raw_data_exploration_plots/10cust_10fac/fixed_costs_distribution.png
Plot saved: results/cflp_raw_data_exploration_plots/10cust_10fac/transportation_costs_heatmap.png


## 4. Conclusion

In [9]:
print("CFLP raw data exploration complete. Plots saved to:", OUTPUT_PLOTS_DIR)

CFLP raw data exploration complete. Plots saved to: results/cflp_raw_data_exploration_plots/10cust_10fac
