# Global Annual Deforestation Trend and Top 10 Country Averages (1990–2020)

In [20]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px

# Load dataset
df = pd.read_csv('annual-deforestation.csv')

# Prepare global (World) timeseries and convert to million hectares per year
world_df = df[df['Entity'] == 'World'].copy()
world_df['Deforestation (M Ha/yr)'] = world_df['Deforestation'] / 1000000

# Plot global annual deforestation trend
fig_global = px.line(
    world_df,
    x='Year',
    y='Deforestation (M Ha/yr)',
    title='Global Annual Deforestation Rate Trend',
    labels={
        'Year': 'Period End Year (Annual Average of Preceding 5-10 Years)',
        'Deforestation (M Ha/yr)': 'Deforestation Rate (Million Hectares/Year)'
    },
    markers=True,
)
fig_global.update_layout(xaxis=dict(tickmode='linear'), title_x=0.5)

# Display global figure
fig_global.show()


# Identify aggregate entities (regions/aggregates) to exclude from country-level analysis
aggregate_entities = df[df['Code'].isna()]['Entity'].unique()
entities_to_exclude = list(aggregate_entities) + ['World']

# Filter to country-level rows (exclude aggregates and world)
country_df = df[~df['Entity'].isin(entities_to_exclude)].copy()

# Compute average deforestation per country and pick top 10
avg_deforestation = country_df.groupby('Entity')['Deforestation'].mean().sort_values(ascending=False).reset_index().head(10)
avg_deforestation['Average Deforestation (k Ha/yr)'] = avg_deforestation['Deforestation'] / 1000

# Plot top 10 countries by average annual deforestation (thousands of hectares/year)
fig_top10 = px.bar(
    avg_deforestation,
    x='Entity',
    y='Average Deforestation (k Ha/yr)',
    title='Top 10 Countries/Territories by Average Annual Deforestation Rate (1990-2020)',
    labels={
        'Entity': 'Country/Territory',
        'Average Deforestation (k Ha/yr)': 'Average Deforestation Rate (Thousands of Hectares/Year)'
    },
    color='Average Deforestation (k Ha/yr)',
    color_continuous_scale=px.colors.sequential.Sunset,
)
fig_top10.update_layout(title_x=0.5)

# Display top10 figure
fig_top10.show()

# Region vs World Trend Comparison

In [21]:
# Selected major regions (must match Entity labels in dataset)
regions = [
    "Asia",
    "Europe",
    "Africa",
    "North America",
    "South America"
]

# filter world and regional data
df_region = df[df["Entity"].isin(["World"] + regions)].copy()

# Convert units: hectares → million hectares M Ha/yr
df_region["Deforestation_Mha"] = df_region["Deforestation"] / 1_000_000

# Plot line chart
fig_region = px.line(
    df_region,
    x="Year",
    y="Deforestation_Mha",
    color="Entity",
    title="Global vs Regional Annual Deforestation Trend<br>",
    labels={
        "Deforestation_Mha": "Deforestation (M Ha/yr)",
        "Year": "Year",
        "Entity": "Region"
    }
)

# Chart optimization: increase line width, optimize legend position
fig_region.update_traces(line=dict(width=3))
fig_region.update_layout(
    legend=dict(
        title="Region",
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    ),
    height=500
)

fig_region.show()


### From the graph above, we can find that South America shows the highest deforestation levels, driving much of the global trend, while Africa’s rates gradually increase over time. Asia experiences a steady decline, reflecting successful reforestation efforts, and Europe remains close to zero with stable forests. Overall, global deforestation largely mirrors changes in a few key regions.

# Evolution of annual deforestation rates in various countries (animated map)

In [22]:
# Filter to country-level rows (keep entries with a country code)
country_df = df[df['Code'].notna()].copy()

# Convert deforestation to thousands of hectares per year for easier reading
country_df['Deforestation (k Ha/yr)'] = country_df['Deforestation'] / 1000

# Prepare string year column for animation frames
country_df['Year_str'] = country_df['Year'].astype(str)

# Build choropleth animation: annual deforestation rate evolution by country
fig = px.choropleth(
    country_df,
    locations='Code',                
    color='Deforestation (k Ha/yr)', 
    hover_name='Entity',             
    animation_frame='Year_str',      
    color_continuous_scale=px.colors.sequential.YlOrRd, 
    scope='world',                   
    title='Annual Deforestation Rate Evolution by Country (1990-2015/2020)',
    labels={'Deforestation (k Ha/yr)': 'Deforestation Rate (Thousands of Hectares/Year)'}
)

# Add a manual slider with one step per year to control the animation
fig.update_layout(
    title_x=0.5,
    sliders=[dict(
        steps=[dict(
            method='animate',
            args=[[str(y)]],
            label=str(y)
        ) for y in sorted(country_df['Year'].unique())]
    )]
)

print("Generated animated world map visualization of deforestation.")
fig.show()

Generated animated world map visualization of deforestation.


# Global Deforestation Trends: Choropleth Visualization

In [23]:
# Data Preparation: Filter and process data for visualization
country_df = df[df['Code'].notna()].copy()
country_df['Deforestation (k Ha/yr)'] = country_df['Deforestation'] / 1000
country_df['Year_str'] = country_df['Year'].astype(str)

# Create Choropleth Map: Visualize deforestation trends globally
fig = px.choropleth(
    country_df,
    locations='Code',
    color='Deforestation (k Ha/yr)',
    hover_name='Entity',
    animation_frame='Year_str',
    color_continuous_scale=px.colors.sequential.Reds,
    range_color=[0, 500], 
    projection='natural earth', 
    title='<b>Global Deforestation Evolution</b><br><i>(Annual Rate in Thousands of Hectares)</i>',
)

# Customize Layout: Adjust title, margins, and colorbar
fig.update_layout(
    title_font_size=24,
    title_x=0.5,
    font_family="Arial, sans-serif",
    margin=dict(l=20, r=20, t=80, b=20),
    coloraxis_colorbar=dict(
        title="Loss (k Ha/yr)",
        thickness=15,
        len=0.5,
        tickfont=dict(color='white')
    )
)

# Customize Map Appearance: Enhance map visuals
fig.update_geos(
    visible=False, 
    showcountries=True, countrycolor="#8D8D8D", 
    showland=True, landcolor="#2A2A2A", 
    showocean=True, oceancolor="#0099FF" 
)

# Display the map
fig.show()

# Global Deforestation: Linear Regression Forecast to 2030

In [24]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression

# Load dataset
df = pd.read_csv('annual-deforestation.csv')

# Prepare world timeseries (use explicit 'World' rows if available, otherwise aggregate)
if 'World' in df['Entity'].values:
    df_world = df[df['Entity'] == 'World'].copy()
else:
    df_world = df.groupby('Year')['Deforestation'].sum().reset_index()

df_world = df_world.sort_values('Year')

# Train linear regression on Year -> Deforestation
X = df_world['Year'].values.reshape(-1, 1)
y = df_world['Deforestation'].values

model = LinearRegression()
model.fit(X, y)

# Forecast for selected future years
future_years = np.array([2020, 2025, 2030]).reshape(-1, 1)
future_predictions = model.predict(future_years)

# Build combined historical + forecast dataframe
df_history = df_world[['Year', 'Deforestation']].copy()
df_history['Type'] = 'Historical Data'

df_future = pd.DataFrame({
    'Year': future_years.flatten(),
    'Deforestation': future_predictions,
    'Type': 'AI Prediction'
})

df_forecast = pd.concat([df_history, df_future], ignore_index=True)

# Convert to million hectares for plotting
df_forecast['Deforestation (M Ha)'] = df_forecast['Deforestation'] / 1_000_000

# Plot lines for historical and predicted values
fig = px.line(
    df_forecast, 
    x='Year', 
    y='Deforestation (M Ha)', 
    color='Type',
    markers=True,
    title='<b>AI Forecast: Global Deforestation Rate (Gross Loss)</b><br><i>(Projected Trend to 2030)</i>',
    labels={'Deforestation (M Ha)': 'Deforestation Rate (Million Hectares/Year)'},
    color_discrete_map={'Historical Data': 'red', 'AI Prediction': 'orange'}
)

# Add a small annotation summarizing the trend direction from the linear model
mean_future_val = df_future['Deforestation'].mean() / 1_000_000
trend = "Increasing" if model.coef_[0] > 0 else "Decreasing"
fig.add_annotation(
    x=2025, y=mean_future_val, 
    text=f"Trend: {trend} Loss",
    showarrow=True,
    arrowhead=1
)

fig.update_layout(template='plotly_dark', title_x=0.5)
fig.show()

### AI Linear Forecast Analysis

The linear model projects a steady decline in global deforestation, dropping from about 10 M ha/yr in 2015 to around 7 M ha/yr by 2030. This indicates a continuation of the long-term decreasing trend observed since the early 2000s.

# Global forecast with Prophet

In [25]:
# !pip install prophet

from prophet import Prophet
import plotly.graph_objects as go

# Prepare global time series (World)

df_world_ts = df[df["Entity"] == "World"][["Year", "Deforestation"]].dropna().copy()
df_world_ts = df_world_ts.sort_values("Year")

# Prophet expects columns: ds (datetime) and y (target value)
df_world_ts["y"] = df_world_ts["Deforestation"] / 1_000_000  # Convert to M Ha/yr
df_world_ts["ds"] = pd.to_datetime(df_world_ts["Year"].astype(int).astype(str) + "-01-01")

ts_world = df_world_ts[["ds", "y"]]

last_year = df_world_ts["Year"].max()

print("Historical years:", df_world_ts["Year"].min(), "→", last_year)

# Define and fit Prophet model

prophet_model = Prophet(
    yearly_seasonality=False,  # Yearly seasonality not critical for this annual series
    weekly_seasonality=False,
    daily_seasonality=False
)
prophet_model.fit(ts_world)

# Create future dataframe up to 2030

# Calculate how many years to forecast ahead (assuming up to 2030)
target_year = 2030
n_future_years = max(0, target_year - last_year)

future_df = prophet_model.make_future_dataframe(
    periods=n_future_years,
    freq="Y"   # Yearly frequency
)

forecast = prophet_model.predict(future_df)

# Extract only future part (after last historical year)
forecast_future = forecast[forecast["ds"] > df_world_ts["ds"].max()].copy()
forecast_future["Year"] = forecast_future["ds"].dt.year

# Plot historical data + Prophet forecast + uncertainty interval

fig_prophet = go.Figure()

# 历史折线 Historical line
fig_prophet.add_trace(
    go.Scatter(
        x=df_world_ts["Year"],
        y=df_world_ts["y"],
        mode="lines",
        name="Historical"
    )
)

# Forecast mean line
fig_prophet.add_trace(
    go.Scatter(
        x=forecast_future["Year"],
        y=forecast_future["yhat"],
        mode="lines",
        name="Prophet forecast"
    )
)

# Uncertainty band (yhat_lower, yhat_upper)
fig_prophet.add_trace(
    go.Scatter(
        x=list(forecast_future["Year"]) + list(forecast_future["Year"][::-1]),
        y=list(forecast_future["yhat_upper"]) + list(forecast_future["yhat_lower"][::-1]),
        fill="toself",
        name="Forecast uncertainty interval",
        hoverinfo="skip",
        line=dict(width=0)
    )
)

fig_prophet.update_layout(
    title=(
        "Global Deforestation Forecast with Prophet<br>"
    ),
    xaxis_title="Year",
    yaxis_title="Deforestation (M Ha/yr)",
    legend_title="Data type",
    height=500
)

fig_prophet.show()


16:09:08 - cmdstanpy - INFO - Chain [1] start processing
16:09:08 - cmdstanpy - INFO - Chain [1] done processing


Historical years: 1990 → 2015



'Y' is deprecated and will be removed in a future version, please use 'YE' instead.



### Prophet Forecast Analysis:

Prophet predicts a continued decline in global deforestation after 2015, with values approaching 7–8 M Ha/yr by 2030. The uncertainty band indicates meaningful variation around the trend, suggesting that future outcomes may shift depending on policy actions and environmental pressures.

# Global Forest Cover Share, Per‑Capita Deforestation Emissions, and Forest Transition Matrix

In [26]:
import pandas as pd
import plotly.express as px

# Load input datasets
df_share = pd.read_csv('forest-area-as-share-of-land-area.csv')
df_transition = pd.read_csv('forest-transition-phase.csv')
df_per_capita = pd.read_csv('per-capita-co2-food-deforestation.csv')
df_change = pd.read_csv('annual-change-forest-area.csv')

# Keep the latest available share-of-forest row per country code
df_share_latest = df_share.sort_values('Year').drop_duplicates('Code', keep='last')

# Choropleth: latest forest share (%) by country
fig_share = px.choropleth(
    df_share_latest,
    locations='Code',
    color='Share of land covered by forest',
    hover_name='Entity',
    title='<b>Global Forest Cover Share</b><br><i>(% of Land Area - Latest Available Data)</i>',
    color_continuous_scale=px.colors.sequential.Greens,
    template='plotly_dark'
)
fig_share.update_layout(title_x=0.5)
fig_share.show()


# Select a target year for per-capita CO2 (food-related deforestation) analysis
target_year_co2 = 2013
df_pc_2013 = df_per_capita[df_per_capita['Year'] == target_year_co2].copy()

# Top 15 countries by per-capita embodied emissions in target year
top_consumers = df_pc_2013.sort_values('per_capita_embodied_emissions', ascending=False).head(15)

# Horizontal bar chart of top per-capita consumers
fig_consumer = px.bar(
    top_consumers,
    x='per_capita_embodied_emissions',
    y='Entity',
    orientation='h',
    title=f'<b>Top 15 Countries by Per Capita Deforestation Emissions ({target_year_co2})</b><br><i>(CO2 from Food-Related Deforestation)</i>',
    labels={'per_capita_embodied_emissions': 'CO2 Emissions (Tonnes per person)', 'Entity': 'Country'},
    color='per_capita_embodied_emissions',
    color_continuous_scale=px.colors.sequential.Magma,
    template='plotly_dark'
)
fig_consumer.update_layout(title_x=0.5, yaxis={'categoryorder':'total ascending'})
fig_consumer.show()

# Build a transition matrix for a chosen year: forest share vs annual net change
target_year_matrix = 2010

# Extract required columns for the chosen year
df_share_yr = df_share[df_share['Year'] == target_year_matrix][['Code', 'Entity', 'Share of land covered by forest']]
df_change_yr = df_change[df_change['Year'] == target_year_matrix][['Code', 'Annual net change in forest area']]
# Use the latest transition phase per country
df_trans_yr = df_transition.sort_values('Year').drop_duplicates('Code', keep='last')[['Code', 'Forest Transition Phase']]

# Merge into one table for the scatter matrix
df_matrix = pd.merge(df_share_yr, df_change_yr, on='Code', how='inner')
df_matrix = pd.merge(df_matrix, df_trans_yr, on='Code', how='inner')

# Scatter: forest share vs net change, colored by transition phase
fig_matrix = px.scatter(
    df_matrix,
    x='Share of land covered by forest',
    y='Annual net change in forest area',
    color='Forest Transition Phase',
    size='Share of land covered by forest', 
    hover_name='Entity',
    title=f'<b>Forest Transition Matrix ({target_year_matrix})</b><br><i>Clustering Countries by Forest Share vs. Net Change</i>',
    labels={
        'Share of land covered by forest': 'Forest Share (% of Land)',
        'Annual net change in forest area': 'Net Forest Change (Hectares/Year)'
    },
    template='plotly_dark',
    color_discrete_sequence=px.colors.qualitative.Bold
)

# Add a horizontal zero line to denote net zero balance
fig_matrix.add_hline(y=0, line_dash="dash", line_color="white", annotation_text="Net Zero (Balance)")
fig_matrix.update_layout(title_x=0.5)
fig_matrix.show()

# Multi-feature regression for deforestation

In [27]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import plotly.express as px

# Base table: country-year level deforestation

df_base = df[df["Code"].notna() & (df["Entity"] != "World")][
    ["Entity", "Code", "Year", "Deforestation"]
].copy()

# Convert deforestation to k Ha/yr for easier interpretation
df_base["Deforestation_kha"] = df_base["Deforestation"] / 1000.0

# Merge other features on (Entity, Code, Year)
#    - Forest share (% of land covered by forest)
#    - Annual net change in forest area
#    - per_capita_embodied_emissions (optional)
df_ml = df_base.merge(
    df_share[["Entity", "Code", "Year", "Share of land covered by forest"]],
    on=["Entity", "Code", "Year"],
    how="left"
).merge(
    df_change[["Entity", "Code", "Year", "Annual net change in forest area"]],
    on=["Entity", "Code", "Year"],
    how="left"
).merge(
    df_per_capita[["Entity", "Code", "Year", "per_capita_embodied_emissions"]],
    on=["Entity", "Code", "Year"],
    how="left"
)

# Select features:
# - Forest share & net change: required (no NaN)
# - Per-capita CO2: optional, fill NaN with 0

base_features = [
    "Share of land covered by forest",
    "Annual net change in forest area"
]

# If per_capita_embodied_emissions exists, use it as an extra feature
extra_features = []
if "per_capita_embodied_emissions" in df_ml.columns:
    df_ml["per_capita_embodied_emissions"] = df_ml["per_capita_embodied_emissions"].fillna(0.0)
    extra_features.append("per_capita_embodied_emissions")

FEATURE_COLS = base_features + extra_features

# Drop rows with missing values in base features or target
df_ml_clean = df_ml.dropna(subset=base_features + ["Deforestation_kha"]).copy()

print("Number of samples after merging:", len(df_ml))
print("Number of clean samples for modelling:", len(df_ml_clean))

# If too few samples, skip modelling to avoid errors
if len(df_ml_clean) < 20:
    print("Not enough clean samples for a reliable model.")
else:
    X = df_ml_clean[FEATURE_COLS]
    y = df_ml_clean["Deforestation_kha"]

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # Train a Random Forest regressor
    rf_model = RandomForestRegressor(
        n_estimators=200,
        random_state=42,
        n_jobs=-1
    )
    rf_model.fit(X_train, y_train)

    # Evaluate performance on the test set
    y_pred = rf_model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"Test MAE (k Ha/yr): {mae:.2f}")
    print(f"Test R² score: {r2:.3f}")

    # Compute and visualise feature importance
    fi_df = pd.DataFrame({
        "Feature": FEATURE_COLS,
        "Importance": rf_model.feature_importances_
    }).sort_values("Importance", ascending=False)

    fig_fi = px.bar(
        fi_df,
        x="Importance",
        y="Feature",
        orientation="h",
        title="Feature Importance for Deforestation Model",
        labels={
            "Importance": "Relative Importance",
            "Feature": "Feature"
        }
    )
    fig_fi.update_layout(title_x=0.5)
    fig_fi.show()


Number of samples after merging: 467
Number of clean samples for modelling: 467
Test MAE (k Ha/yr): 19.25
Test R² score: 0.552


### Feature Importance Analysis

Annual net change in forest area is the strongest predictor, showing that recent forest gains or losses directly reflect deforestation intensity. Forest-cover share has limited influence, and per-capita CO₂ emissions contribute almost nothing.

# Forecasting Global Forest Area Change Using Linear Regression

In [28]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

# Load and preprocess data
df = pd.read_csv('annual-change-forest-area.csv')
df_clean = df[df['Code'].notna()].copy()
df_clean = df_clean[df_clean['Entity'] != 'World']

print(f"Data Loaded: {len(df_clean)} rows.")

# Split data into training and testing sets
train_df = df_clean[df_clean['Year'] < 2015]
test_df = df_clean[df_clean['Year'] == 2015]

print(f"Training Sets: {len(train_df)} rows. Test Sets: {len(test_df)} rows.")

# Initialize variables for model training and evaluation
countries = df_clean['Entity'].unique()
mae_scores = []
models = {} 
future_forecasts = []

print("Training models per country...")

# Train a linear regression model for each country
for country in countries:
    c_train = train_df[train_df['Entity'] == country]
    c_test = test_df[test_df['Entity'] == country]
    
    if len(c_train) >= 2:  # Ensure sufficient data for training
        X_train = c_train['Year'].values.reshape(-1, 1)
        y_train = c_train['Annual net change in forest area'].values
        
        model = LinearRegression()
        model.fit(X_train, y_train)
        
        # Evaluate model on test data if available
        if len(c_test) > 0:
            X_test = c_test['Year'].values.reshape(-1, 1)
            y_true = c_test['Annual net change in forest area'].values
            y_pred_test = model.predict(X_test)
            
            mae = mean_absolute_error(y_true, y_pred_test)
            mae_scores.append(mae)
        
        # Retrain model on full data and predict future values
        c_full = df_clean[df_clean['Entity'] == country]
        X_full = c_full['Year'].values.reshape(-1, 1)
        y_full = c_full['Annual net change in forest area'].values
        
        final_model = LinearRegression()
        final_model.fit(X_full, y_full)
        
        future_years = np.array([2020, 2025, 2030]).reshape(-1, 1)
        future_preds = final_model.predict(future_years)
        
        code = c_full['Code'].iloc[0]
        for yr, pred in zip(future_years.flatten(), future_preds):
            future_forecasts.append({
                'Entity': country,
                'Code': code,
                'Year': yr,
                'Predicted Net Change': pred,
                'Type': 'AI Forecast'
            })

# Calculate average MAE across all countries
avg_error = np.mean(mae_scores)
print(f"Model Evaluation Complete. Average Mean Absolute Error across all countries: {avg_error:.2f} Hectares")

# Combine historical and forecast data for visualization
df_forecast = pd.DataFrame(future_forecasts)
df_history = df_clean[['Entity', 'Code', 'Year', 'Annual net change in forest area']].copy()
df_history.rename(columns={'Annual net change in forest area': 'Predicted Net Change'}, inplace=True)
df_history['Type'] = 'Historical'

df_map = pd.concat([df_history, df_forecast], ignore_index=True)

# Convert values for better readability
df_map['Net Change (k Ha)'] = df_map['Predicted Net Change'] / 1000
df_map['Year_Str'] = df_map['Year'].astype(str)

# Filter data for 2030 and create a choropleth map
df_2030 = df_map[df_map['Year'] == 2030].copy()

fig = px.choropleth(
    df_2030,
    locations='Code',
    color='Net Change (k Ha)',
    hover_name='Entity',
    title='<b>AI Forecast: The World in 2030</b><br><i>Predicted Annual Net Forest Change (Linear Model)</i>',
    color_continuous_scale=px.colors.diverging.RdYlGn,
    range_color=[-100, 100],
    template='plotly_dark'
)

fig.update_layout(title_x=0.5)
fig.show()

Data Loaded: 467 rows.
Training Sets: 348 rows. Test Sets: 119 rows.
Training models per country...
Model Evaluation Complete. Average Mean Absolute Error across all countries: 45147.09 Hectares


# Country-Level Deforestation Forecasting and Validation with Linear Regression

In [29]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

# LOAD & PROCESS DATA
df = pd.read_csv('annual-change-forest-area.csv')

# Filter country-level data and split into training and testing sets
df_countries = df[df['Code'].notna()].copy()
df_countries = df_countries[df_countries['Entity'] != 'World']

train_df = df_countries[df_countries['Year'] < 2015]
test_df = df_countries[df_countries['Year'] == 2015]

errors = []

future_years = [2020, 2025, 2030]
forecast_rows = []

# TRAIN & VALIDATE MODELS FOR EACH COUNTRY
for country in df_countries['Entity'].unique():
    country_train = train_df[train_df['Entity'] == country]
    country_test = test_df[test_df['Entity'] == country]
    
    # Validate model using 2015 data if sufficient training data exists
    if len(country_train) >= 2 and not country_test.empty:
        X_train = country_train['Year'].values.reshape(-1, 1)
        y_train = country_train['Annual net change in forest area'].values
        X_test = country_test['Year'].values.reshape(-1, 1)
        y_actual = country_test['Annual net change in forest area'].values
        
        val_model = LinearRegression()
        val_model.fit(X_train, y_train)
        
        pred_2015 = val_model.predict(X_test)
        
        mae = mean_absolute_error(y_actual, pred_2015)
        errors.append(mae)

    # Train final model on full data and forecast future years
    country_full = df_countries[df_countries['Entity'] == country]
    
    if len(country_full) >= 2:
        X_full = country_full['Year'].values.reshape(-1, 1)
        y_full = country_full['Annual net change in forest area'].values
        
        final_model = LinearRegression()
        final_model.fit(X_full, y_full)
        
        X_future = np.array(future_years).reshape(-1, 1)
        y_future = final_model.predict(X_future)
        
        code = country_full['Code'].iloc[0]
        for yr, val in zip(future_years, y_future):
            forecast_rows.append([country, code, yr, val, 'AI Forecast'])

# CALCULATE AVERAGE VALIDATION ERROR
avg_error = np.mean(errors)
print(f"Model Validation Complete. Average Mean Absolute Error across all countries: {avg_error:.2f} Hectares")

# COMBINE HISTORICAL AND FORECAST DATA
df_forecast = pd.DataFrame(forecast_rows, columns=['Entity', 'Code', 'Year', 'Annual net change in forest area', 'Type'])
df_history = df_countries[['Entity', 'Code', 'Year', 'Annual net change in forest area']].copy()
df_history['Type'] = 'Historical Data'

df_combined = pd.concat([df_history, df_forecast], ignore_index=True)
df_combined['Net Change (k Ha/yr)'] = df_combined['Annual net change in forest area'] / 1000

# Add labels for animation frames
df_combined['Year Label'] = df_combined.apply(lambda x: f"{x['Year']} (Forecast)" if x['Type'] == 'AI Forecast' else str(x['Year']), axis=1)
df_combined = df_combined.sort_values(['Year', 'Entity'])

# PLOT CHOROPLETH MAP WITH ANIMATION
fig = px.choropleth(
    df_combined,
    locations='Code',
    color='Net Change (k Ha/yr)',
    hover_name='Entity',
    animation_frame='Year Label',
    color_continuous_scale=px.colors.diverging.RdYlGn, 
    range_color=[-100, 100], 
    title=f'<b>AI Prediction: Net Forest Change (Validation Error: +/- {avg_error/1000:.1f}k Ha)</b>',
    template='plotly_dark'
)

fig.update_layout(title_x=0.5)
fig.show()

Model Validation Complete. Average Mean Absolute Error across all countries: 45147.09 Hectares


# Ethical AI & Limitations

1. Data Limitations

The data contains gaps, inconsistent reporting, and limited coverage for some countries and years. Important socio-economic drivers of deforestation are also not fully captured, which may introduce bias.

2. Model Simplification

Linear models and Prophet rely on smooth, stable trend assumptions and cannot reflect sudden policy changes, climate events, economic shocks, or illegal logging activities.

3. Forecast Uncertainty

Prophet’s uncertainty intervals do not account for deeper structural uncertainties. Forecasts should be interpreted as scenario estimates rather than precise outcomes.

4. Ethical Considerations

Misinterpreted AI predictions may misguide policy decisions or create unfair pressure on specific regions. Clear communication of model limits is essential.

5. Responsible Use

Use multiple models, communicate uncertainty transparently, incorporate expert and local context, and update forecasts as new data becomes available.