In [23]:
import os
import json
from collections import defaultdict
import zipfile
import tarfile
import tempfile
import shutil

def is_valid_uuid_folder(dirname):
    """Check if the directory name represents a valid UUID."""
    return len(dirname) == 32 and all(c in "0123456789abcdefABCDEF" for c in dirname)

def extract_info(config_file):
    """Extract information from the configuration file."""
    with open(config_file, 'r') as f:
        config_data = json.load(f)
        return {
            'graph': config_data['network'].get('kind', ''),
            'size': config_data['network'].get('size', ''),
            'op': config_data.get('op', ''),
            'epsilon': config_data.get('epsilon', ''),
            'trials': config_data.get('trials', '')
        }

def get_uuid_folders(directory):
    """Retrieve a list of UUID folders within the given directory."""
    path = os.path.expanduser(directory)
    uuid_folders = []
    try:
        for dirpath, dirnames, _ in os.walk(path):
            for dirname in dirnames:
                folder_path = os.path.join(dirpath, dirname)
                if is_valid_uuid_folder(dirname):
                    uuid_folders.append(folder_path)
    except (FileNotFoundError, PermissionError) as e:
        print(f"Error accessing folder: {e}")
    return uuid_folders

def extract_compressed_file(file_path, extract_to):
    """Extract a compressed file to the specified directory."""
    try:
        if file_path.endswith('.zip'):
            with zipfile.ZipFile(file_path, 'r') as zip_ref:
                zip_ref.extractall(extract_to)
        elif file_path.endswith('.tgz') or file_path.endswith('.tar.gz'):
            try:
                with tarfile.open(file_path, 'r') as tar_ref:
                    tar_ref.extractall(extract_to)
            except tarfile.ReadError as e:
                print(f"Tar read error for '{file_path}': {e}")
                return False
        elif file_path.endswith('.tar'):
            try:
                with tarfile.open(file_path, 'r') as tar_ref:
                    tar_ref.extractall(extract_to)
            except tarfile.ReadError as e:
                print(f"Tar read error for '{file_path}': {e}")
                return False
        else:
            print(f"Unsupported file type: {file_path}")
            return False
        return True
    except Exception as e:
        print(f"Error extracting compressed file '{file_path}': {e}")
        return False

def analyze_directory(directory):
    """Analyze the directory and print the content analysis."""
    uuid_folders = get_uuid_folders(directory)
    print(f"UUID folders found: {len(uuid_folders)} in {directory}\n")
    
    analysis = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(int)))))
    
    for folder in uuid_folders:
        config_file = os.path.join(folder, 'configuration.json')
        if os.path.exists(config_file):
            info = extract_info(config_file)
            graph = info['graph']
            size = info['size']
            op = info['op']
            epsilon = info['epsilon']
            trials = info['trials']
            
            analysis[graph][size][op][epsilon][trials] += 1
    
    # Print analysis results
    for graph, sizes in analysis.items():
        print(f"Graph Type '{graph}':")
        for size, ops in sizes.items():
            print(f"  Size '{size}':")
            for op, epsilons in ops.items():
                print(f"    Operation '{op}':")
                for epsilon, trials in epsilons.items():
                    for trial, count in trials.items():
                        print(f"      Epsilon '{epsilon}': {count} occurrences with Trials '{trial}'")
        print('_'*81)
        print()
                        
def process_compressed_file(file_path, directory):
    """Process a compressed file."""
    print(f"Found a compressed file: {file_path}")
    with tempfile.TemporaryDirectory() as temp_dir:
        # print(f"Extracting {file_path} to {temp_dir}...")
        if extract_compressed_file(file_path, temp_dir):
            # print(f"Extracted {file_path} successfully.")
            analyze_directory(temp_dir)
        else:
            print(f"Failed to extract {file_path}.")
    

# Directory to analyze
# directory = '/Users/prudhvivuda/Documents/polygraphs-cache/1/'
directory = '/Users/prudhvivuda/Documents/p/'

analyze_directory(directory)

for root, _, files in os.walk(directory):
    for file in files:
        file_path = os.path.join(root, file)
        if file_path.endswith(('.zip', '.tgz', '.tar.gz', '.tar')):
            process_compressed_file(file_path, directory)


UUID folders found: 266 in /Users/prudhvivuda/Documents/p/

Graph Type 'wattsstrogatz':
  Size '8':
    Operation 'OConnorWeatherallOp':
      Epsilon '0.001': 5 occurrences with Trials '64'
  Size '16':
    Operation 'OConnorWeatherallOp':
      Epsilon '0.001': 5 occurrences with Trials '64'
_________________________________________________________________________________

Graph Type 'barabasialbert':
  Size '8':
    Operation 'OConnorWeatherallOp':
      Epsilon '0.001': 5 occurrences with Trials '64'
  Size '16':
    Operation 'OConnorWeatherallOp':
      Epsilon '0.001': 10 occurrences with Trials '64'
_________________________________________________________________________________

Graph Type 'snap':
  Size 'None':
    Operation 'BalaGoyalOp':
      Epsilon '0.001': 5 occurrences with Trials '10'
_________________________________________________________________________________

Found a compressed file: /Users/prudhvivuda/Documents/p/Polarization/2022-02-17-003.zip
UUID folders f