# CFLP Raw Data Exploration

In [None]:
import json
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Configuration
CFLP_DATA_FILE = 'models/CFLP/data/capfacloc_data_10cust_10fac.json'

## 1. Load Raw Data

In [None]:
try:
    with open(CFLP_DATA_FILE, 'r') as f:
        cflp_raw_data = json.load(f)
    print(f"Successfully loaded raw CFLP data from {CFLP_DATA_FILE}")
    print("Keys available:", cflp_raw_data.keys())
except FileNotFoundError:
    print(f"Error: Data file not found at {CFLP_DATA_FILE}")
    cflp_raw_data = {}
except json.JSONDecodeError as e:
    print(f"Error decoding JSON from {CFLP_DATA_FILE}: {e}")
    cflp_raw_data = {}

## 2. Explore Data Structure and Contents

In [None]:
if cflp_raw_data:
    print("\nDemands:", cflp_raw_data.get('demands'))
    print("\nCapacities:", cflp_raw_data.get('capacities'))
    print("\nFixed Costs:", cflp_raw_data.get('fixed_costs'))
    print("\nTransportation Costs (first 3x3):")
    # Print a subset for large matrices
    if 'transportation_costs' in cflp_raw_data:
        for i, row in enumerate(cflp_raw_data['transportation_costs']):
            if i < 3:
                print(row[:3])
            else:
                break
    
    print(f"\nNumber of Customers: {len(cflp_raw_data.get('demands', []))}")
    print(f"Number of Facilities: {len(cflp_raw_data.get('capacities', []))}")

## 3. Basic Data Visualizations

In [None]:
if cflp_raw_data:
    # Demands distribution
    if 'demands' in cflp_raw_data and cflp_raw_data['demands']:
        plt.figure(figsize=(8, 5))
        sns.histplot(cflp_raw_data['demands'], kde=True)
        plt.title('Distribution of Demands')
        plt.xlabel('Demand Value')
        plt.ylabel('Frequency')
        plt.show()

    # Capacities distribution
    if 'capacities' in cflp_raw_data and cflp_raw_data['capacities']:
        plt.figure(figsize=(8, 5))
        sns.histplot(cflp_raw_data['capacities'], kde=True, color='orange')
        plt.title('Distribution of Capacities')
        plt.xlabel('Capacity Value')
        plt.ylabel('Frequency')
        plt.show()

    # Fixed Costs distribution
    if 'fixed_costs' in cflp_raw_data and cflp_raw_data['fixed_costs']:
        plt.figure(figsize=(8, 5))
        sns.histplot(cflp_raw_data['fixed_costs'], kde=True, color='green')
        plt.title('Distribution of Fixed Costs')
        plt.xlabel('Fixed Cost Value')
        plt.ylabel('Frequency')
        plt.show()

    # Transportation Costs heatmap (if not too large)
    if 'transportation_costs' in cflp_raw_data and cflp_raw_data['transportation_costs']:
        tc_df = pd.DataFrame(cflp_raw_data['transportation_costs'])
        if tc_df.shape[0] <= 20 and tc_df.shape[1] <= 20: # Limit for readability
            plt.figure(figsize=(10, 8))
            sns.heatmap(tc_df, annot=True, cmap='viridis', fmt=".1f")
            plt.title('Transportation Costs Heatmap')
            plt.xlabel('Customer Index')
            plt.ylabel('Facility Index')
            plt.show()
        else:
            print("Transportation costs matrix too large for heatmap visualization.")