In [1]:
import csv
from collections import defaultdict
import os

def parse_values(value_str):
    return tuple(sorted(map(int, value_str.replace(" ", "").split(","))))

def analyze_patterns(file_path):
    patterns = defaultdict(list)
    
    with open(file_path, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            pattern = (
                parse_values(row['tax']),
                parse_values(row['ssp']),
                parse_values(row['ag']),
                parse_values(row['soc'])
            )
            patterns[pattern].append(row['FileName'])
    
    return patterns

def get_scenarios_from_file(base_folder, filename):
    scenarios = set()
    full_path = os.path.join(base_folder, '93_csv_only', filename + '.csv')
    print(f"Attempting to open: {full_path}")
    if os.path.exists(full_path):
        print(f"File exists: {full_path}")
    else:
        print(f"File does not exist: {full_path}")
    try:
        with open(full_path, 'r') as f:
            reader = csv.reader(f)
            next(reader)  # Skip header
            for row in reader:
                scenarios.add(tuple(row))  # Assuming each row represents a unique scenario
    except FileNotFoundError:
        print(f"Warning: File {full_path} not found.")
    except PermissionError:
        print(f"Permission denied: Unable to read {full_path}")
    except Exception as e:
        print(f"Error reading {full_path}: {str(e)}")
    return scenarios

def check_folder_structure(base_folder):
    csv_folder = os.path.join(base_folder, '93_csv_only')
    if not os.path.exists(csv_folder):
        print(f"Error: Folder not found: {csv_folder}")
        return False
    print(f"CSV folder found: {csv_folder}")
    csv_files = [f for f in os.listdir(csv_folder) if f.endswith('.csv')]
    print(f"Number of CSV files found: {len(csv_files)}")
    print("First few CSV files:")
    for file in csv_files[:5]:
        print(f"  {file}")
    return True

def analyze_overlapping_scenarios(patterns, base_folder):
    overlaps = defaultdict(lambda: defaultdict(int))
    
    combinations = [combo for combo, files in patterns.items() if len(files) > 1]
    
    for i, combo1 in enumerate(combinations):
        files1 = patterns[combo1]
        scenarios1 = set()
        for file in files1:
            scenarios1.update(get_scenarios_from_file(base_folder, file))
        
        for combo2 in combinations[i+1:]:
            files2 = patterns[combo2]
            scenarios2 = set()
            for file in files2:
                scenarios2.update(get_scenarios_from_file(base_folder, file))
            
            overlap = len(scenarios1.intersection(scenarios2))
            overlaps[combo1][combo2] = overlap
            overlaps[combo2][combo1] = overlap
    
    return overlaps

def print_analysis(patterns, overlaps):
    print("Combinations appearing more than once:")
    for combo, files in patterns.items():
        if len(files) > 1:
            print(f"\nCombination: TAX{combo[0]} SSP{combo[1]} AG{combo[2]} SOC{combo[3]}")
            print(f"Frequency: {len(files)}")
            print("Files:")
            for file in files:
                print(f"  - {file}")
    
    print("\nOverlapping scenarios between combinations:")
    for combo1, others in overlaps.items():
        print(f"\nOverlaps for TAX{combo1[0]} SSP{combo1[1]} AG{combo1[2]} SOC{combo1[3]}:")
        for combo2, overlap in others.items():
            if overlap > 0:
                print(f"  With TAX{combo2[0]} SSP{combo2[1]} AG{combo2[2]} SOC{combo2[3]}: {overlap} scenarios")

# Usage
file_path = 'consolidated_sets_scenarios_93.csv'
base_folder = 'C:/Users/HP/Desktop/OSTI_Application_DD_Water_Basins/7. BOI_OSTI_29_June/OS_overlap_analysis'

if not check_folder_structure(base_folder):
    print("Exiting due to folder structure issues.")
    exit()

patterns = analyze_patterns(file_path)
overlaps = analyze_overlapping_scenarios(patterns, base_folder)
print_analysis(patterns, overlaps)

Error: Folder not found: C:/Users/HP/Desktop/OSTI_Application_DD_Water_Basins/7. BOI_OSTI_29_June/OS_overlap_analysis\93_csv_only
Exiting due to folder structure issues.
Attempting to open: C:/Users/HP/Desktop/OSTI_Application_DD_Water_Basins/7. BOI_OSTI_29_June/OS_overlap_analysis\93_csv_only\AfricaNorthern_NileR_W_cluster_7_scenarios.csv
File does not exist: C:/Users/HP/Desktop/OSTI_Application_DD_Water_Basins/7. BOI_OSTI_29_June/OS_overlap_analysis\93_csv_only\AfricaNorthern_NileR_W_cluster_7_scenarios.csv
Attempting to open: C:/Users/HP/Desktop/OSTI_Application_DD_Water_Basins/7. BOI_OSTI_29_June/OS_overlap_analysis\93_csv_only\CA_AmuDaryaR_W_cluster_2_scenarios.csv
File does not exist: C:/Users/HP/Desktop/OSTI_Application_DD_Water_Basins/7. BOI_OSTI_29_June/OS_overlap_analysis\93_csv_only\CA_AmuDaryaR_W_cluster_2_scenarios.csv
Attempting to open: C:/Users/HP/Desktop/OSTI_Application_DD_Water_Basins/7. BOI_OSTI_29_June/OS_overlap_analysis\93_csv_only\India_GangesR_W_cluster_3_scena