# Delete Operations Demo

This notebook demonstrates how SyftWatcher handles file deletion operations and allows manual inspection of the append-only log.

## Setup

In [None]:
import sys
import os
import time
import json
import tempfile
import shutil
from pathlib import Path
from datetime import datetime

# Add parent directory to path
sys.path.insert(0, str(Path().absolute().parent))

from syft_sync import SyftWatcher
from tests.test_utils import (
    extract_syft_archive,
    read_file_from_archive,
    get_archive_metadata,
    verify_syft_message_content,
    get_version_archive
)

## Create Test Directories

In [None]:
# Create temporary directories for testing
temp_dir = Path(tempfile.mkdtemp())
watch_dir = temp_dir / "watched"
log_dir = temp_dir / "logs"

watch_dir.mkdir(parents=True)
log_dir.mkdir(parents=True)

print(f"Watch directory: {watch_dir}")
print(f"Log directory: {log_dir}")

## Initialize Watcher

In [None]:
# Create and start the watcher
watcher = SyftWatcher(
    watch_path=str(watch_dir),
    log_path=str(log_dir),
    verbose=True
)

watcher.start()
print("Watcher started!")
time.sleep(0.5)

## Create Some Test Files

In [None]:
# Create several test files
test_files = [
    ("important_doc.txt", "This document contains important information that should not be lost."),
    ("config.json", json.dumps({"app": "demo", "version": "1.0", "debug": True}, indent=2)),
    ("script.py", "#!/usr/bin/env python3\nprint('Hello from delete demo')\n"),
]

for filename, content in test_files:
    file_path = watch_dir / filename
    file_path.write_text(content)
    print(f"Created: {filename}")
    time.sleep(0.5)

## Inspect Current Log Structure

In [None]:
def show_log_structure(log_dir):
    """Display the structure of the log directory"""
    print(f"\nLog directory structure:")
    for root, dirs, files in os.walk(log_dir):
        level = root.replace(str(log_dir), '').count(os.sep)
        indent = ' ' * 2 * level
        print(f'{indent}{os.path.basename(root)}/')
        sub_indent = ' ' * 2 * (level + 1)
        for file in files:
            print(f'{sub_indent}{file}')

show_log_structure(log_dir)

In [None]:
# Show archive contents
archives = sorted(log_dir.glob('*.tar.gz'))
print(f"\nFound {len(archives)} archives:")
for archive in archives:
    print(f"  - {archive.name}")

## Delete a File

In [None]:
# Delete one of the files
file_to_delete = watch_dir / "important_doc.txt"
print(f"Deleting: {file_to_delete.name}")
file_to_delete.unlink()

# Wait for the delete event to be processed
time.sleep(1.0)

print("File deleted!")

## Inspect Log After Deletion

In [None]:
# Show updated log structure
show_log_structure(log_dir)

In [None]:
# Show all archives again
archives = sorted(log_dir.glob('*.tar.gz'))
print(f"\nNow have {len(archives)} archives:")
for archive in archives:
    print(f"  - {archive.name}")

## Examine Archive Contents

In [None]:
# Extract metadata from all archives
all_events = []
for archive_path in archives:
    metadata_list = get_archive_metadata(archive_path)
    for metadata in metadata_list:
        if 'version_id' in metadata and 'event_type' in metadata:
            event_info = {
                'archive': archive_path.name,
                'version_id': metadata['version_id'],
                'event_type': metadata['event_type'],
                'file_name': metadata.get('file_name', 'N/A'),
                'timestamp': metadata.get('timestamp', 'N/A')
            }
            all_events.append(event_info)

# Sort by timestamp
all_events.sort(key=lambda x: x['timestamp'])

# Display events
print("\nAll events in chronological order:")
for event in all_events:
    print(f"\nArchive: {event['archive']}")
    print(f"  Event: {event['event_type']}")
    print(f"  File: {event['file_name']}")
    print(f"  Version: {event['version_id']}")
    print(f"  Time: {event['timestamp']}")

## Find and Restore Deleted File

In [None]:
# Find the create event for our deleted file
deleted_file_name = "important_doc.txt"
create_event = None

for event in all_events:
    if event['event_type'] == 'file_created' and event['file_name'] == deleted_file_name:
        create_event = event
        break

if create_event:
    print(f"Found create event for {deleted_file_name}:")
    print(f"  Version ID: {create_event['version_id']}")
    print(f"  Archive: {create_event['archive']}")
else:
    print(f"No create event found for {deleted_file_name}")

In [None]:
# Read content from the archive
if create_event:
    archive = get_version_archive(log_dir, create_event['version_id'])
    if archive:
        try:
            content = read_file_from_archive(archive, deleted_file_name)
            print(f"\nRecovered content from archive:")
            print("-" * 50)
            print(content)
            print("-" * 50)
        except Exception as e:
            print(f"Error reading from archive: {e}")
    else:
        print("Archive not found!")

In [None]:
# Restore the file
if create_event:
    archive = get_version_archive(log_dir, create_event['version_id'])
    if archive:
        restore_path = watch_dir / f"restored_{deleted_file_name}"
        
        # Extract and restore
        info = extract_syft_archive(archive)
        try:
            source_file = info["data_dir"] / deleted_file_name
            if source_file.exists():
                shutil.copy2(source_file, restore_path)
                print(f"\nFile restored to: {restore_path}")
                print(f"Content: {restore_path.read_text()}")
            else:
                print(f"File not found in archive data directory")
        finally:
            shutil.rmtree(info["temp_dir"], ignore_errors=True)

## Delete Events Analysis

In [None]:
# Find all delete events
delete_events = [e for e in all_events if e['event_type'] == 'file_deleted']
print(f"\nFound {len(delete_events)} delete events:")
for event in delete_events:
    print(f"\n  File: {event['file_name']}")
    print(f"  Time: {event['timestamp']}")
    print(f"  Archive: {event['archive']}")

## Examine a Specific Archive

In [None]:
# Pick an archive to examine in detail
if archives:
    archive_to_examine = archives[0]  # Examine the first archive
    print(f"\nExamining archive: {archive_to_examine.name}")
    
    # Extract and show structure
    info = extract_syft_archive(archive_to_examine)
    try:
        print(f"\nArchive structure:")
        print(f"  Message directory: {info['message_dir'].name}")
        print(f"  Has data directory: {info['data_dir'].exists()}")
        print(f"  Metadata files: {len(info['metadata_files'])}")
        for mf in info['metadata_files']:
            print(f"    - {mf.name}")
        print(f"  Data files: {len(info['data_files'])}")
        for df in info['data_files']:
            print(f"    - {df.name}")
            
        # Show metadata content
        if info['metadata_files']:
            with open(info['metadata_files'][0], 'r') as f:
                metadata = json.load(f)
            print(f"\nMetadata content:")
            print(json.dumps(metadata, indent=2))
    finally:
        shutil.rmtree(info["temp_dir"], ignore_errors=True)

## Stop Watcher

In [None]:
# Stop the watcher
watcher.stop()
print("Watcher stopped.")

# Show final stats
stats = watcher.get_stats()
print(f"\nFinal statistics:")
print(f"  Files created: {stats['files_created']}")
print(f"  Files modified: {stats['files_modified']}")
print(f"  Files deleted: {stats['files_deleted']}")
print(f"  Total versions: {stats['total_versions']}")

## Cleanup

In [None]:
# Optionally cleanup the temporary directories
cleanup = input("\nCleanup temporary directories? (y/n): ")
if cleanup.lower() == 'y':
    shutil.rmtree(temp_dir)
    print("Cleaned up!")
else:
    print(f"\nDirectories preserved at:")
    print(f"  Watch: {watch_dir}")
    print(f"  Log: {log_dir}")