**Consolidate CSV & JSON Files from Many Folders to One**

In [None]:
# Whoop Data - CSV & JSON to RawWhoopData -> ConsoldiatedWhoopData

import os
import shutil
import hashlib

# ----------- Configuration Section -----------
source_folder_name = "RawWhoopData"
destination_folder_name = "ConsolidatedWhoopData"
# ----------------------------------------------

def calculate_file_hash(file_path):
    """Calculate SHA256 hash of a file."""
    hasher = hashlib.sha256()
    with open(file_path, 'rb') as f:
        buf = f.read()
        hasher.update(buf)
    return hasher.hexdigest()

def find_folder(base_folder, target_folder_name):
    """Recursively find a folder with a specific name."""
    for root, dirs, files in os.walk(base_folder):
        if target_folder_name in dirs:
            return os.path.join(root, target_folder_name)
    return None

def move_csv_and_json_files(source, destination):
    csv_files_moved = 0
    json_files_moved = 0

    for root, dirs, files in os.walk(source):
        for file in files:
            file_clean = file.strip().lower()
            if file_clean.endswith('.csv') or file_clean.endswith('.json'):
                source_path = os.path.join(root, file)
                destination_path = os.path.join(destination, file)

                if not os.path.exists(destination):
                    os.makedirs(destination)

                # If a file with the same name exists, rename it
                if os.path.exists(destination_path):
                    base, ext = os.path.splitext(file)
                    counter = 1
                    while os.path.exists(destination_path):
                        new_filename = f"{base}_{counter}{ext}"
                        destination_path = os.path.join(destination, new_filename)
                        counter += 1

                shutil.move(source_path, destination_path)

                if file_clean.endswith('.csv'):
                    csv_files_moved += 1
                elif file_clean.endswith('.json'):
                    json_files_moved += 1

    return csv_files_moved, json_files_moved

def delete_subfolders_in_source(source):
    subfolders = [os.path.join(source, d) for d in os.listdir(source) if os.path.isdir(os.path.join(source, d))]
    for folder in subfolders:
        shutil.rmtree(folder)
    return len(subfolders)

def remove_duplicate_files(destination):
    seen_hashes = {}
    duplicate_csv = 0
    duplicate_json = 0

    for file in os.listdir(destination):
        file_path = os.path.join(destination, file)
        file_clean = file.strip().lower()

        if os.path.isfile(file_path) and (file_clean.endswith('.csv') or file_clean.endswith('.json')):
            file_hash = calculate_file_hash(file_path)
            if file_hash in seen_hashes:
                # Duplicate found
                ext = os.path.splitext(file)[1].lower()
                os.remove(file_path)

                if ext == ".csv":
                    duplicate_csv += 1
                elif ext == ".json":
                    duplicate_json += 1
            else:
                seen_hashes[file_hash] = file_path

    return duplicate_csv, duplicate_json

# ----------- Main Execution -----------
# Get the current folder where the script is running
current_dir = os.getcwd()

# Find source and destination folders dynamically
source_folder = find_folder(current_dir, source_folder_name)
destination_folder = find_folder(current_dir, destination_folder_name)

if not source_folder:
    print(f"Source folder '{source_folder_name}' not found.")
elif not destination_folder:
    print(f"Destination folder '{destination_folder_name}' not found.")
else:
    # Move files
    csv_moved, json_moved = move_csv_and_json_files(source_folder, destination_folder)

    # Delete subfolders
    subfolders_deleted = delete_subfolders_in_source(source_folder)

    # Remove duplicate files
    duplicate_csv_deleted, duplicate_json_deleted = remove_duplicate_files(destination_folder)

    # Final Output
    print(f"CSV Files Moved: {csv_moved}")
    print(f"JSON Files Moved: {json_moved}")
    print(f"Source Sub Folders Deleted: {subfolders_deleted}")
    print(f"Duplicate CSVs Deleted: {duplicate_csv_deleted}")
    print(f"Duplicate JSONs Deleted: {duplicate_json_deleted}")


In [None]:
# Garmin Data - CSV & JSON to RawWhoopData -> ConsoldiatedWhoopData

import os
import shutil
import hashlib

# ----------- Configuration Section -----------
source_folder_name = "RawGarminData"
destination_folder_name = "ConsolidatedGarminData"
# ----------------------------------------------

def calculate_file_hash(file_path):
    """Calculate SHA256 hash of a file."""
    hasher = hashlib.sha256()
    with open(file_path, 'rb') as f:
        buf = f.read()
        hasher.update(buf)
    return hasher.hexdigest()

def find_folder(base_folder, target_folder_name):
    """Recursively find a folder with a specific name."""
    for root, dirs, files in os.walk(base_folder):
        if target_folder_name in dirs:
            return os.path.join(root, target_folder_name)
    return None

def move_csv_and_json_files(source, destination):
    csv_files_moved = 0
    json_files_moved = 0

    for root, dirs, files in os.walk(source):
        for file in files:
            file_clean = file.strip().lower()
            if file_clean.endswith('.csv') or file_clean.endswith('.json'):
                source_path = os.path.join(root, file)
                destination_path = os.path.join(destination, file)

                if not os.path.exists(destination):
                    os.makedirs(destination)

                # If a file with the same name exists, rename it
                if os.path.exists(destination_path):
                    base, ext = os.path.splitext(file)
                    counter = 1
                    while os.path.exists(destination_path):
                        new_filename = f"{base}_{counter}{ext}"
                        destination_path = os.path.join(destination, new_filename)
                        counter += 1

                shutil.move(source_path, destination_path)

                if file_clean.endswith('.csv'):
                    csv_files_moved += 1
                elif file_clean.endswith('.json'):
                    json_files_moved += 1

    return csv_files_moved, json_files_moved

def delete_subfolders_in_source(source):
    subfolders = [os.path.join(source, d) for d in os.listdir(source) if os.path.isdir(os.path.join(source, d))]
    for folder in subfolders:
        shutil.rmtree(folder)
    return len(subfolders)

def remove_duplicate_files(destination):
    seen_hashes = {}
    duplicate_csv = 0
    duplicate_json = 0

    for file in os.listdir(destination):
        file_path = os.path.join(destination, file)
        file_clean = file.strip().lower()

        if os.path.isfile(file_path) and (file_clean.endswith('.csv') or file_clean.endswith('.json')):
            file_hash = calculate_file_hash(file_path)
            if file_hash in seen_hashes:
                # Duplicate found
                ext = os.path.splitext(file)[1].lower()
                os.remove(file_path)

                if ext == ".csv":
                    duplicate_csv += 1
                elif ext == ".json":
                    duplicate_json += 1
            else:
                seen_hashes[file_hash] = file_path

    return duplicate_csv, duplicate_json

# ----------- Main Execution -----------
# Get the current folder where the script is running
current_dir = os.getcwd()

# Find source and destination folders dynamically
source_folder = find_folder(current_dir, source_folder_name)
destination_folder = find_folder(current_dir, destination_folder_name)

if not source_folder:
    print(f"Source folder '{source_folder_name}' not found.")
elif not destination_folder:
    print(f"Destination folder '{destination_folder_name}' not found.")
else:
    # Move files
    csv_moved, json_moved = move_csv_and_json_files(source_folder, destination_folder)

    # Delete subfolders
    subfolders_deleted = delete_subfolders_in_source(source_folder)

    # Remove duplicate files
    duplicate_csv_deleted, duplicate_json_deleted = remove_duplicate_files(destination_folder)

    # Final Output
    print(f"CSV Files Moved: {csv_moved}")
    print(f"JSON Files Moved: {json_moved}")
    print(f"Source Sub Folders Deleted: {subfolders_deleted}")
    print(f"Duplicate CSVs Deleted: {duplicate_csv_deleted}")
    print(f"Duplicate JSONs Deleted: {duplicate_json_deleted}")


**Consolidate CSV & JSON Files with Similar Values**

In [None]:
# Garmin UDSFile Consoldiation - Many JSON to One - Consolidated WhoopDate -> CleanWhoopData

import os
import json
import shutil

# ----------- Configuration Section -----------
source_folder_name = "ConsolidatedGarminData"
destination_folder_name = "CleanGarminData"
keyword_in_path = "UDSFile"  # 🔹 Only JSON files with this keyword in the FILENAME
output_consolidated_filename = "Consolidated_UDSFile.json"
# ----------------------------------------------

def find_folder(base_folder, target_folder_name):
    """Recursively find a folder with a specific name."""
    for root, dirs, files in os.walk(base_folder):
        if target_folder_name in dirs:
            return os.path.join(root, target_folder_name)
    return None

def consolidate_json_files(source_folder, keyword):
    """Find JSON files containing the keyword in their filename, and consolidate them."""
    all_records = []
    files_to_delete = []
    jsons_consolidated = 0

    for root, dirs, files in os.walk(source_folder):
        for file in files:
            if file.lower().endswith('.json') and keyword.lower() in file.lower():
                file_path = os.path.join(root, file)
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        data = json.load(f)

                        if isinstance(data, dict):
                            all_records.append(data)
                        elif isinstance(data, list):
                            all_records.extend(data)

                    files_to_delete.append(file_path)
                    jsons_consolidated += 1
                except Exception as e:
                    print(f"Error reading {file_path}: {e}")

    return all_records, files_to_delete, jsons_consolidated

def remove_duplicate_records(records):
    """Remove duplicate records by hashing JSON strings."""
    seen = set()
    unique_records = []
    duplicates_removed = 0

    for record in records:
        record_str = json.dumps(record, sort_keys=True)  # Serialize to a JSON string

        if record_str not in seen:
            seen.add(record_str)
            unique_records.append(record)
        else:
            duplicates_removed += 1

    return unique_records, duplicates_removed

def move_final_json(destination_folder, output_filename, consolidated_records):
    """Save consolidated unique records into final JSON file in destination folder."""
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    final_path = os.path.join(destination_folder, output_filename)

    with open(final_path, 'w', encoding='utf-8') as f:
        json.dump(consolidated_records, f, indent=4)

def delete_source_jsons(files_to_delete):
    """Delete original JSON files that were consolidated."""
    for file_path in files_to_delete:
        os.remove(file_path)
    return len(files_to_delete)

# ----------- Main Execution -----------
current_dir = os.getcwd()

source_folder = find_folder(current_dir, source_folder_name)
destination_folder = find_folder(current_dir, destination_folder_name)

if not source_folder:
    print(f"Source folder '{source_folder_name}' not found.")
elif not destination_folder:
    print(f"Destination folder '{destination_folder_name}' not found.")
else:
    all_records, files_to_delete, jsons_consolidated = consolidate_json_files(source_folder, keyword_in_path)

    consolidated_records, duplicates_removed = remove_duplicate_records(all_records)
    total_records = len(consolidated_records)

    move_final_json(destination_folder, output_consolidated_filename, consolidated_records)
    jsons_deleted = delete_source_jsons(files_to_delete)

    # Final Output
    print(f"JSONs Consolidated: {jsons_consolidated}")
    print(f"Source JSONs Deleted: {jsons_deleted}")
    print(f"Duplicate Records Removed: {duplicates_removed}")
    print(f"Total Records Consolidated: {total_records}")


In [9]:
# Garmin TrainingHistory Consoldiation - Many JSON to One - Consolidated WhoopDate -> CleanWhoopData

import os
import json
import shutil

# ----------- Configuration Section -----------
source_folder_name = "ConsolidatedGarminData"
destination_folder_name = "CleanGarminData"
keyword_in_path = "TrainingHistory"  # 🔹 Only JSON files with this keyword in the FILENAME
output_consolidated_filename = "Consolidated_TrainingHistory.json"
# ----------------------------------------------

def find_folder(base_folder, target_folder_name):
    """Recursively find a folder with a specific name."""
    for root, dirs, files in os.walk(base_folder):
        if target_folder_name in dirs:
            return os.path.join(root, target_folder_name)
    return None

def consolidate_json_files(source_folder, keyword):
    """Find JSON files containing the keyword in their filename, and consolidate them."""
    all_records = []
    files_to_delete = []
    jsons_consolidated = 0

    for root, dirs, files in os.walk(source_folder):
        for file in files:
            if file.lower().endswith('.json') and keyword.lower() in file.lower():
                file_path = os.path.join(root, file)
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        data = json.load(f)

                        if isinstance(data, dict):
                            all_records.append(data)
                        elif isinstance(data, list):
                            all_records.extend(data)

                    files_to_delete.append(file_path)
                    jsons_consolidated += 1
                except Exception as e:
                    print(f"Error reading {file_path}: {e}")

    return all_records, files_to_delete, jsons_consolidated

def remove_duplicate_records(records):
    """Remove duplicate records by hashing JSON strings."""
    seen = set()
    unique_records = []
    duplicates_removed = 0

    for record in records:
        record_str = json.dumps(record, sort_keys=True)  # Serialize to a JSON string

        if record_str not in seen:
            seen.add(record_str)
            unique_records.append(record)
        else:
            duplicates_removed += 1

    return unique_records, duplicates_removed

def move_final_json(destination_folder, output_filename, consolidated_records):
    """Save consolidated unique records into final JSON file in destination folder."""
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    final_path = os.path.join(destination_folder, output_filename)

    with open(final_path, 'w', encoding='utf-8') as f:
        json.dump(consolidated_records, f, indent=4)

def delete_source_jsons(files_to_delete):
    """Delete original JSON files that were consolidated."""
    for file_path in files_to_delete:
        os.remove(file_path)
    return len(files_to_delete)

# ----------- Main Execution -----------
current_dir = os.getcwd()

source_folder = find_folder(current_dir, source_folder_name)
destination_folder = find_folder(current_dir, destination_folder_name)

if not source_folder:
    print(f"Source folder '{source_folder_name}' not found.")
elif not destination_folder:
    print(f"Destination folder '{destination_folder_name}' not found.")
else:
    all_records, files_to_delete, jsons_consolidated = consolidate_json_files(source_folder, keyword_in_path)

    consolidated_records, duplicates_removed = remove_duplicate_records(all_records)
    total_records = len(consolidated_records)

    move_final_json(destination_folder, output_consolidated_filename, consolidated_records)
    jsons_deleted = delete_source_jsons(files_to_delete)

    # Final Output
    print(f"JSONs Consolidated: {jsons_consolidated}")
    print(f"Source JSONs Deleted: {jsons_deleted}")
    print(f"Duplicate Records Removed: {duplicates_removed}")
    print(f"Total Records Consolidated: {total_records}")


JSONs Consolidated: 15
Source JSONs Deleted: 15
Duplicate Records Removed: 21
Total Records Consolidated: 2346
