# Setup: Clone Repository and Install Dependencies

This notebook sets up the environment for running experiments.

## Steps:
1. Clone repository from GitHub
2. Install dependencies
3. Mount Google Drive (optional, for large data storage)
4. Verify installation


In [None]:
# Clone repository
!git clone https://github.com/EonTechie/semeval-context-tree-modular.git
%cd semeval-context-tree-modular


In [None]:
# Install dependencies
!pip install -r requirements.txt


In [None]:
# Mount Google Drive (optional - for large data storage)
from google.colab import drive
drive.mount('/content/drive')


In [None]:
# Setup paths
import sys
from pathlib import Path

BASE_PATH = Path('/content/semeval-context-tree-modular')
DATA_PATH = Path('/content/drive/MyDrive/semeval_data')  # For large files

# Add to Python path
sys.path.insert(0, str(BASE_PATH))

# Create data directory if it doesn't exist
DATA_PATH.mkdir(parents=True, exist_ok=True)

print(f"✅ Base path: {BASE_PATH}")
print(f"✅ Data path: {DATA_PATH}")
print(f"✅ Python path updated")


In [None]:
# Verify imports
from src.data.loader import load_dataset
from src.data.splitter import split_dataset
from src.features.extraction import extract_batch_features_v2, featurize_hf_dataset_in_batches_v2
from src.storage.manager import StorageManager

print("✅ All imports successful!")


In [None]:
# Initialize storage manager
storage = StorageManager(
    base_path=str(BASE_PATH),
    data_path=str(DATA_PATH),
    github_path=str(BASE_PATH)
)

print("✅ Storage manager initialized!")
print(f"   Code/Metadata: {BASE_PATH}")
print(f"   Large Data: {DATA_PATH}")
