# üõ†Ô∏è QualiVault: Setup & Scan
**Goal:** Configure your regex patterns to correctly identify your interviews and audio files.

1. Edit `config.yml` in the project root to point to your audio folder.
2. Use this notebook to test if your regex patterns find the files correctly.
3. Initialize the Git repository for this project.
4. Generate the `processing_recipe.yaml` file.

In [None]:
%load_ext autoreload
%autoreload 2
import yaml
import re
import os
from pathlib import Path
from qualivault.core import scan_audio_folder, init_git_project

def load_config():
    config_path = Path('../config.yml')
    if not config_path.exists():
        print('‚ùå config.yml not found in project root.')
        return None
    with open(config_path, 'r') as f:
        return yaml.safe_load(f)

config = load_config()
if config:
    print(f"‚úÖ Loaded config. Scanning: {config['paths']['org_audio_folder']}")

In [None]:
init_git_project("..")

In [None]:
# Test your patterns here (modify config.yml to change them permanently)
# You can edit these strings in this cell to fine-tune matching before saving to config.

folder_regex = config['patterns']['folder_id_regex']
all_audio_regex = config['patterns'].get('all_audio_regex', ".*\\.(wav|mp3|m4a|flac)$")
target_audio_regex = config['patterns'].get('target_audio_regex', ".*Trlr\\.wav$")

print(f"Folder Regex: {folder_regex}")
print(f"All Audio:    {all_audio_regex}")
print(f"Target Audio: {target_audio_regex}")

In [None]:
# üîç Audit Folders
base_path = Path(config['paths']['org_audio_folder'])
print(f"üìÇ Scanning: {base_path}\n")

if not base_path.exists():
    print("‚ùå Folder not found!")
else:
    # List immediate subdirectories only
    subdirs = [d for d in base_path.iterdir() if d.is_dir()]
    subdirs.sort()
    
    for d in subdirs:
        match = re.search(folder_regex, d.name)
        if match:
            try:
                i_id = match.group('id')
            except IndexError:
                i_id = "(No 'id' group)"
            print(f"‚úÖ MATCH: '{d.name}' -> ID: {i_id}")
        else:
            print(f"‚ùå SKIP:  '{d.name}' (No regex match)")

In [None]:
# üîç Audit Files (in matched folders)
print("--- File Analysis ---")

for d in subdirs:
    if re.search(folder_regex, d.name):
        print(f"\nüìÅ {d.name}:")
        target_files = []
        other_files = []
        
        for f in d.iterdir():
            if f.is_file():
                if re.match(target_audio_regex, f.name, re.IGNORECASE):
                    target_files.append(f.name)
                elif re.match(all_audio_regex, f.name, re.IGNORECASE):
                    other_files.append(f.name)
        
        if target_files:
            print(f"  üéØ Target ({len(target_files)}): {target_files}")
        else:
            print(f"  ‚ö†Ô∏è  NO TARGET AUDIO FOUND")
            
        if other_files:
            print(f"  üîä Other  ({len(other_files)}): {other_files}")

In [None]:
interviews = scan_audio_folder(config['paths']['org_audio_folder'], folder_regex, target_audio_regex)

print(f"Found {len(interviews)} interviews.")
for i_id, files in list(interviews.items())[:5]:
    print(f"ID: {i_id} -> {len(files)} files")

### Generate Recipe
If the scan above looks correct, run this cell to generate the `processing_recipe.yaml` file.
You can then manually reorder files in the YAML if necessary.

In [None]:
recipe = []
for i_id, files in interviews.items():
    files.sort() # Default sort
    recipe.append({
        'id': i_id,
        'status': 'pending',
        'output_name': f"Interview_{i_id}.flac",
        'files': files
    })

recipe_path = Path('../processing_recipe.yaml')
if not recipe_path.exists():
    with open(recipe_path, 'w') as f:
        yaml.dump(recipe, f, sort_keys=False)
    print(f"‚úÖ Generated recipe with {len(recipe)} items at {recipe_path}")
else:
    print("‚ö†Ô∏è processing_recipe.yaml already exists. Delete it to regenerate.")