# Archaeological Site Discovery Pipeline

This notebook demonstrates the complete pipeline for discovering archaeological sites in the Upper Xingu region using AI and satellite imagery.

## Setup and Initialization

In [None]:
import os
from pathlib import Path
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from src.pipeline import Pipeline
import logging
import json

# Configure logging
logging.basicConfig(level=logging.INFO)

# Initialize pipeline
pipeline = Pipeline('configs/config.json')

# Xingu Archaeological Site Detection Pipeline

This notebook implements the archaeological site detection pipeline using satellite imagery and historical data analysis.

In [None]:
import sys
import os
from pathlib import Path
sys.path.append('..')

import torch
import numpy as np
import geopandas as gpd
from src.pipeline import Pipeline
from src.visualization import ArchaeologicalVisualizer
import matplotlib.pyplot as plt
%matplotlib inline

## 1. Initialize Pipeline

Load configuration and set up the pipeline components.

In [None]:
# Initialize pipeline
pipeline = Pipeline('configs/config.json')
visualizer = ArchaeologicalVisualizer()

## 2. Data Collection and Processing

Collect satellite imagery and historical data for the study area.

In [None]:
# Run the pipeline
validated_sites, visualization_results = pipeline.run()

## 3. Visualize Results

Create interactive visualizations of detected archaeological sites.

In [None]:
# Display Kepler.gl map
visualization_results['kepler_map']

## 4. Analyze Site Distribution

Analyze the spatial distribution of detected sites.

In [None]:
# Plot site distribution
fig, ax = plt.subplots(figsize=(12, 8))
validated_sites.plot(ax=ax, column='probability', cmap='viridis',
                    legend=True, legend_kwds={'label': 'Detection Probability'})
ax.set_title('Archaeological Site Distribution')
plt.show()

# Echoes of the Xingu: AI-Powered Discovery of Lost Amazonian Urbanism

This notebook demonstrates the complete workflow for discovering archaeological sites in the Upper Xingu River Basin using satellite imagery, LIDAR data, and historical records.

In [None]:
import os
import sys
from pathlib import Path

# Add src directory to path
src_dir = Path('../src').resolve()
sys.path.append(str(src_dir))

# Import project modules
from pipeline import Pipeline
from config import ConfigManager
from visualization import ArchaeologicalVisualizer

## 1. Configuration and Setup

Load configuration and initialize the pipeline

In [None]:
# Initialize pipeline
config_path = '../configs/config.json'
pipeline = Pipeline(config_path)

print("Pipeline initialized with study area:")
print(pipeline.config_manager.get_study_area())

## 2. Data Collection

Fetch satellite imagery and historical data

In [None]:
# Collect satellite data
satellite_data = pipeline._collect_satellite_data()

print("\nSatellite data sources:")
for source, data in satellite_data.items():
    print(f"{source}: {data['metadata']['date']} (Cloud cover: {data['metadata']['cloud_cover']}%)")

In [None]:
# Process historical texts
historical_data = pipeline._process_historical_data()

print("\nProcessed historical data:")
for data_type, items in historical_data.items():
    print(f"{data_type}: {len(items)} items processed")

## 3. Data Preprocessing

Prepare data for model input

In [None]:
# Preprocess collected data
processed_data = pipeline._preprocess_data(satellite_data, historical_data)

print("\nPreprocessed data shapes:")
for source, data in processed_data['satellite'].items():
    print(f"{source}: {data['data'].shape}")

## 4. Model Predictions

Run archaeological site detection model

In [None]:
# Run model predictions
site_predictions = pipeline._run_model_predictions(processed_data)

print(f"\nIdentified {len(site_predictions)} potential archaeological sites")
site_predictions.head()

## 5. Validation

Validate predictions using hydrological and settlement pattern analysis

In [None]:
# Validate predictions
validated_sites = pipeline._validate_predictions(site_predictions)

print(f"\nValidated {len(validated_sites)} sites out of {len(site_predictions)} predictions")
validated_sites.head()

## 6. Visualization

Create interactive visualizations of the results

In [None]:
# Create visualizations
visualization_results = pipeline._create_visualizations(
    validated_sites,
    processed_data,
    historical_data
)

# Display Kepler.gl map
visualization_results['kepler_map']

In [None]:
# Display temporal visualization
visualization_results['temporal_vis']

## 7. Save Results

Save validated sites and visualizations

In [None]:
# Save all results
pipeline._save_results(validated_sites, visualization_results)

print("\nResults saved to:")
print(f"- Validated sites: {Path('../data/outputs/validated_sites.geojson')}")
print(f"- Visualizations: {Path('../data/outputs/visualizations/')}")

## Data Collection and Preprocessing

First, let's collect and preprocess the satellite imagery and historical data.

In [None]:
# Process data sources
satellite_data = pipeline._collect_satellite_data()
historical_data = pipeline._process_historical_data()
processed_data = pipeline._preprocess_data(satellite_data, historical_data)

print("Data collection and preprocessing completed!")

## Model Training

Now we'll train the archaeological site detection model using the preprocessed data.

In [None]:
# Train the model
pipeline.train_model()

# Load and plot training metrics
with open('results/training_metrics.json', 'r') as f:
    metrics = json.load(f)

# Plot training curves
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Loss curves
ax1.plot(metrics['train_loss'], label='Train Loss')
ax1.plot(metrics['val_loss'], label='Validation Loss')
ax1.set_title('Training and Validation Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.legend()

# IoU curves
ax2.plot(metrics['train_iou'], label='Train IoU')
ax2.plot(metrics['val_iou'], label='Validation IoU')
ax2.set_title('Training and Validation IoU')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('IoU')
ax2.legend()

plt.tight_layout()
plt.show()

## Site Prediction and Validation

With our trained model, we can now predict potential archaeological sites and validate them using multiple criteria.

In [None]:
# Run predictions
site_predictions = pipeline._run_model_predictions(processed_data)

# Validate predictions
validated_sites = pipeline._validate_predictions(site_predictions)

print(f"Found {len(validated_sites)} validated archaeological sites!")

## Visualization

Finally, let's create interactive visualizations of our discoveries.

In [None]:
# Create visualizations
visualizations = pipeline._create_visualizations(
    validated_sites,
    processed_data,
    historical_data
)

# Display Kepler.gl map
display(visualizations['kepler_map'])

# Save results
pipeline._save_results(validated_sites, visualizations)