# Tree Scout
In this notebook we will use the Global Forest Watch API of the past decade to predict future deforestation

In [3]:
# Import necessary packages
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import json
import warnings
warnings.filterwarnings('ignore')

# For environment variables
from dotenv import load_dotenv
import os

# For machine learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load environment variables from .env file
load_dotenv()

print("All packages imported successfully!")

All packages imported successfully!


In [6]:
import csv 
import numpy as np



with open('US_County_Boundingboxes.csv', mode='r') as file:
    us_county_bounds_csv = csv.DictReader(file)

    county_data = {}
    
    for line in us_county_bounds_csv:
        county_name = line['COUNTY_NAME']
        
        # Try to extract state and county FIPS codes if available in your CSV
        # Common column names: 'STATEFP', 'COUNTYFP', 'GEOID', 'FIPS'
        state_fips = line.get('STATEFP', None)  # Adjust column name as needed
        county_fips = line.get('COUNTYFP', None)  # Adjust column name as needed
        
        county_data[county_name] = {
            'state_fips': state_fips,
            'county_fips': county_fips,
            'coordinates': [[
                [float(line['xmin']), float(line['ymin'])],
                [float(line['xmax']), float(line['ymin'])],
                [float(line['xmax']), float(line['ymax'])],
                [float(line['xmin']), float(line['ymax'])],
                [float(line['xmin']), float(line['ymin'])]
            ]]
        }

for key in county_data.keys():
    print(f"{key}: {county_data[key]}")



Autauga: {'state_fips': '01', 'county_fips': '001', 'coordinates': [[[-86.921237, 32.307574], [-86.411172, 32.307574], [-86.411172, 32.708213], [-86.921237, 32.708213], [-86.921237, 32.307574]]]}
Baldwin: {'state_fips': '13', 'county_fips': '009', 'coordinates': [[[-83.42909, 32.926141], [-83.044327, 32.926141], [-83.044327, 33.1904], [-83.42909, 33.1904], [-83.42909, 32.926141]]]}
Barbour: {'state_fips': '54', 'county_fips': '001', 'coordinates': [[[-80.227173, 38.947236], [-79.808791, 38.947236], [-79.808791, 39.303337], [-80.227173, 39.303337], [-80.227173, 38.947236]]]}
Bibb: {'state_fips': '13', 'county_fips': '021', 'coordinates': [[[-83.89205, 32.660643], [-83.48943, 32.660643], [-83.48943, 32.952792], [-83.89205, 32.952792], [-83.89205, 32.660643]]]}
Blount: {'state_fips': '47', 'county_fips': '009', 'coordinates': [[[-84.188319, 35.460543], [-83.662765, 35.460543], [-83.662765, 35.887125], [-84.188319, 35.887125], [-84.188319, 35.460543]]]}
Bullock: {'state_fips': '01', 'count

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

import time
from functools import wraps

def rate_limit(max_per_second):
    """Decorator to limit API calls per second"""
    min_interval = 1.0 / max_per_second
    last_called = [0.0]
    
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            elapsed = time.time() - last_called[0]
            wait_time = min_interval - elapsed
            if wait_time > 0:
                time.sleep(wait_time)
            result = func(*args, **kwargs)
            last_called[0] = time.time()
            return result
        return wrapper
    return decorator

@rate_limit(5)  # Max 5 requests per second
def get_county_forest_loss_fips(coordinates, start_year=2001, end_year=2023):
    sql = f"""
    SELECT umd_tree_cover_loss__year, SUM(area__ha) as total_loss
    FROM results 
    WHERE umd_tree_cover_loss__year >= {start_year} 
    AND umd_tree_cover_loss__year <= {end_year}
    GROUP BY umd_tree_cover_loss__year
    ORDER BY umd_tree_cover_loss__year
    """
    
    response = requests.post(
        "https://data-api.globalforestwatch.org/dataset/umd_tree_cover_loss/v1.9.1/query/json",
        headers={
            "x-api-key": os.getenv('GFW_API_KEY'),
            "Content-Type": "application/json"
        },
        json={
            "sql": sql,
            "geometry": {
                "type": "Polygon",
                "coordinates": coordinates
            }
        }
    )
    
    return response

"""for index, (key, value) in enumerate(county_data.items()):
    print(f"\n{key}: ")
    # Get data from 2010 to 2020
    res = get_county_forest_loss_fips(value['coordinates'], start_year=2010, end_year=2020)
    
    print(f"Status: {res.status_code}")
    if res.status_code == 200:
        data = res.json()
        print(json.dumps(data, indent=2))
    else:
        print(f"Error: {res.content}")
    if index == 2:  # Test with first 3 counties
        break"""


Autauga: 
Status: 200
{
  "data": [
    {
      "umd_tree_cover_loss__year": 2010,
      "total_loss": 2639.49431
    },
    {
      "umd_tree_cover_loss__year": 2011,
      "total_loss": 1479.47219
    },
    {
      "umd_tree_cover_loss__year": 2012,
      "total_loss": 1987.8889100000001
    },
    {
      "umd_tree_cover_loss__year": 2013,
      "total_loss": 2503.8559800000003
    },
    {
      "umd_tree_cover_loss__year": 2014,
      "total_loss": 2698.52686
    },
    {
      "umd_tree_cover_loss__year": 2015,
      "total_loss": 2782.86239
    },
    {
      "umd_tree_cover_loss__year": 2016,
      "total_loss": 3562.38778
    },
    {
      "umd_tree_cover_loss__year": 2017,
      "total_loss": 3841.99231
    },
    {
      "umd_tree_cover_loss__year": 2018,
      "total_loss": 3225.70245
    },
    {
      "umd_tree_cover_loss__year": 2019,
      "total_loss": 2925.2458500000002
    },
    {
      "umd_tree_cover_loss__year": 2020,
      "total_loss": 2688.02161
    }
  ],


In [None]:
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

def fetch_county_data(county_name, county_info):
    """Fetch data for a single county"""
    try:
        res = get_county_forest_loss_fips(
            county_info['coordinates'], 
            start_year=2010, 
            end_year=2020
        )
        
        if res.status_code == 200:
            data = res.json()
            rows = []
            if 'data' in data:
                for row in data['data']:
                    rows.append({
                        'County_Name': county_name,
                        'State_FIPS': county_info['state_fips'],
                        'County_FIPS': county_info['county_fips'],
                        'Year': row.get('umd_tree_cover_loss__year'),
                        'Tree_Loss_Hectares': row.get('total_loss')
                    })
            return rows
        else:
            print(f"Error for {county_name}: {res.status_code}")
            return []
    except Exception as e:
        print(f"Exception for {county_name}: {e}")
        return []

# Use ThreadPoolExecutor for parallel requests
all_data = []
start_time = time.time()

# Adjust max_workers based on API rate limits (start with 5-10)
with ThreadPoolExecutor(max_workers=10) as executor:
    # Submit all tasks
    future_to_county = {
        executor.submit(fetch_county_data, county_name, county_info): county_name 
        for county_name, county_info in county_data.items()
    }
    
    # Collect results as they complete
    for i, future in enumerate(as_completed(future_to_county)):
        county_name = future_to_county[future]
        try:
            result = future.result()
            all_data.extend(result)
            if i % 100 == 0:
                print(f"Processed {i}/{len(county_data)} counties...")
        except Exception as e:
            print(f"Error processing {county_name}: {e}")

# Convert to DataFrame and save
df = pd.DataFrame(all_data)
df.to_csv('county_forest_loss_data.csv', index=False)

elapsed = time.time() - start_time
print(f"\nData written to CSV. Total rows: {len(df)}")
print(f"Time elapsed: {elapsed:.2f} seconds")
print(df.head())

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82


KeyboardInterrupt: 