# Critical Minerals Data Tools - Demo

This notebook demonstrates how to use the Critical Minerals Data Tools APIs to access:
- **CLAIMM (NETL EDX)**: US Critical Minerals datasets
- **BGS World Mineral Statistics**: Global production data (1970-2023)

## Setup

First, start the unified API server in a terminal:
```bash
cd CMM_API && uv run cmm-api
```

In [None]:
import httpx
import pandas as pd
import json

API_BASE = "http://localhost:8000"

def api_get(endpoint, params=None):
    """Helper to call the API."""
    response = httpx.get(f"{API_BASE}{endpoint}", params=params, timeout=60.0)
    return response.json()

## 1. Data Overview

Get an overview of all available data sources.

In [None]:
overview = api_get("/overview")

print("=== CLAIMM (NETL EDX) ===")
print(f"Description: {overview['sources']['CLAIMM']['description']}")
print(f"Categories: {overview['sources']['CLAIMM']['categories']}")

print("\n=== BGS World Mineral Statistics ===")
print(f"Description: {overview['sources']['BGS']['description']}")
print(f"Time Range: {overview['sources']['BGS']['time_range']}")
print(f"Critical Minerals: {len(overview['sources']['BGS']['commodities'])} commodities")

## 2. BGS Production Data

### 2.1 List Available Commodities

In [None]:
commodities = api_get("/bgs/commodities", {"critical_only": True})
print("Critical Minerals Available:")
for c in commodities["commodities"]:
    print(f"  - {c}")

### 2.2 Top Lithium Producers

In [None]:
lithium = api_get("/bgs/ranking/lithium minerals", {"top_n": 10})

print(f"Top Lithium Producers ({lithium['year']})")
print("=" * 50)

df_lithium = pd.DataFrame(lithium["ranking"])
df_lithium

In [None]:
# Visualize lithium production
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10, 6))
ax.barh(df_lithium["country"], df_lithium["quantity"] / 1e6)
ax.set_xlabel("Production (Million Tonnes)")
ax.set_title(f"Top Lithium Producers ({lithium['year']})")
ax.invert_yaxis()
plt.tight_layout()
plt.show()

### 2.3 Cobalt Supply Chain Analysis

In [None]:
cobalt = api_get("/bgs/ranking/cobalt, mine", {"top_n": 10})

df_cobalt = pd.DataFrame(cobalt["ranking"])
print(f"Top Cobalt Producers ({cobalt['year']})")
print(f"\nSupply Concentration Risk:")
print(f"  Top 1 (DRC): {df_cobalt.iloc[0]['share_percent']:.1f}%")
print(f"  Top 3: {df_cobalt.head(3)['share_percent'].sum():.1f}%")
print(f"  Top 5: {df_cobalt.head(5)['share_percent'].sum():.1f}%")

df_cobalt[["rank", "country", "quantity", "share_percent"]]

### 2.4 Rare Earth Production

In [None]:
ree = api_get("/bgs/ranking/rare earth minerals", {"top_n": 10})

df_ree = pd.DataFrame(ree["ranking"])
print(f"Top Rare Earth Producers ({ree['year']})")
df_ree[["rank", "country", "quantity", "units", "share_percent"]]

### 2.5 Compare Multiple Commodities

In [None]:
# Get top producer share for key battery minerals
battery_minerals = [
    "lithium minerals",
    "cobalt, mine",
    "nickel, mine",
    "graphite",
    "manganese ore"
]

concentration_data = []
for mineral in battery_minerals:
    data = api_get(f"/bgs/ranking/{mineral}", {"top_n": 3})
    if data.get("ranking"):
        top1 = data["ranking"][0]
        concentration_data.append({
            "mineral": mineral,
            "top_producer": top1["country"],
            "top_share": top1["share_percent"],
            "top3_share": sum(r["share_percent"] for r in data["ranking"][:3])
        })

df_concentration = pd.DataFrame(concentration_data)
print("Battery Minerals Supply Concentration")
df_concentration

## 3. CLAIMM Datasets

### 3.1 Search for Datasets

In [None]:
# Search for rare earth datasets
ree_datasets = api_get("/claimm/datasets", {"q": "rare earth", "limit": 5})

print(f"Found {ree_datasets['count']} rare earth datasets:\n")
for ds in ree_datasets["datasets"]:
    print(f"üìÅ {ds['title']}")
    print(f"   ID: {ds['id']}")
    print(f"   Resources: {len(ds['resources'])} files")
    print(f"   Tags: {', '.join(ds['tags'][:5])}")
    print()

### 3.2 Get Dataset Details

In [None]:
# Get details for a specific dataset
if ree_datasets["datasets"]:
    dataset_id = ree_datasets["datasets"][0]["id"]
    details = api_get(f"/claimm/datasets/{dataset_id}")
    
    print(f"Dataset: {details['title']}")
    print(f"\nDescription: {details['description'][:300]}..." if details.get('description') else "No description")
    print(f"\nResources:")
    for r in details["resources"]:
        size_kb = r.get('size', 0) / 1024
        print(f"  - {r['name']} ({r['format']}, {size_kb:.1f} KB)")
        print(f"    URL: {r['url']}")

### 3.3 Dataset Categories

In [None]:
categories = api_get("/claimm/categories")

df_categories = pd.DataFrame([
    {"category": k, "count": v} 
    for k, v in sorted(categories.items(), key=lambda x: x[1], reverse=True)
])

print("CLAIMM Dataset Categories")
df_categories

In [None]:
# Visualize categories
fig, ax = plt.subplots(figsize=(10, 6))
ax.barh(df_categories["category"], df_categories["count"])
ax.set_xlabel("Number of Datasets")
ax.set_title("CLAIMM Dataset Categories")
ax.invert_yaxis()
plt.tight_layout()
plt.show()

## 4. Unified Search

Search across both data sources with a single query.

In [None]:
# Unified search for lithium
results = api_get("/search", {"q": "lithium", "limit": 5})

print("=== CLAIMM Results ===")
if "CLAIMM" in results["sources"]:
    claimm = results["sources"]["CLAIMM"]
    print(f"Found {claimm['count']} datasets")
    for ds in claimm.get("datasets", [])[:3]:
        print(f"  - {ds['title'][:60]}...")

print("\n=== BGS Results ===")
if "BGS" in results["sources"]:
    bgs = results["sources"]["BGS"]
    print(f"Commodity: {bgs.get('commodity', 'N/A')}")
    print(f"Records: {bgs.get('count', 0)}")

## 5. Working with Downloaded BGS Data

If you've downloaded the BGS data from the release, you can work with it directly.

In [None]:
# Load downloaded BGS data (if available)
import os

bgs_data_path = "CLaiMM/bgs_data/bgs_critical_minerals_production.csv"

if os.path.exists(bgs_data_path):
    df_bgs = pd.read_csv(bgs_data_path)
    print(f"Loaded {len(df_bgs):,} records")
    print(f"Columns: {list(df_bgs.columns)}")
    print(f"\nYear range: {df_bgs['year'].min()} - {df_bgs['year'].max()}")
    print(f"Commodities: {df_bgs['commodity'].nunique()}")
    print(f"Countries: {df_bgs['country'].nunique()}")
else:
    print("BGS data not found. Download from:")
    print("https://github.com/Redliana/critical-minerals-data-tools/releases/download/v0.1.0/bgs_data.tar.gz")

## 6. Using with LLMs

### 6.1 Get OpenAI Function Definitions

In [None]:
functions = api_get("/openai/functions")

print("Available Functions for LLM Integration:")
print("=" * 50)
for f in functions["functions"]:
    print(f"\n{f['name']}")
    print(f"  {f['description'][:80]}...")

### 6.2 Example: OpenAI Integration

In [None]:
# Example OpenAI integration (requires OPENAI_API_KEY)
# Uncomment to run

# from openai import OpenAI
# client = OpenAI()
#
# functions = api_get("/openai/functions")["functions"]
#
# response = client.chat.completions.create(
#     model="gpt-4o",
#     messages=[{"role": "user", "content": "What are the top cobalt producing countries?"}],
#     tools=[{"type": "function", "function": f} for f in functions],
# )
#
# print(response.choices[0].message)

## Summary

This demo showed how to:

1. **Get data overview** - See available sources and categories
2. **Query BGS data** - Production rankings, supply chain analysis
3. **Search CLAIMM** - Find US critical minerals datasets
4. **Unified search** - Query both sources at once
5. **Work with downloaded data** - Analyze locally
6. **Integrate with LLMs** - Use OpenAI function definitions

### Next Steps

- Explore more commodities: `api_get("/bgs/commodities")`
- Download specific datasets from CLAIMM
- Set up Claude Desktop with MCP servers
- Build custom analysis workflows