In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px

df = pd.read_csv('annual-deforestation.csv')

world_df = df[df['Entity'] == 'World'].copy()
world_df['Deforestation (M Ha/yr)'] = world_df['Deforestation'] / 1000000

fig_global = px.line(
    world_df,
    x='Year',
    y='Deforestation (M Ha/yr)',
    title='Global Annual Deforestation Rate Trend',
    labels={
        'Year': 'Period End Year (Annual Average of Preceding 5-10 Years)',
        'Deforestation (M Ha/yr)': 'Deforestation Rate (Million Hectares/Year)'
    },
    markers=True,
)
fig_global.update_layout(xaxis=dict(tickmode='linear'), title_x=0.5)

# Display 
fig_global.show()


aggregate_entities = df[df['Code'].isna()]['Entity'].unique()
entities_to_exclude = list(aggregate_entities) + ['World']

country_df = df[~df['Entity'].isin(entities_to_exclude)].copy()
avg_deforestation = country_df.groupby('Entity')['Deforestation'].mean().sort_values(ascending=False).reset_index().head(10)

avg_deforestation['Average Deforestation (k Ha/yr)'] = avg_deforestation['Deforestation'] / 1000

fig_top10 = px.bar(
    avg_deforestation,
    x='Entity',
    y='Average Deforestation (k Ha/yr)',
    title='Top 10 Countries/Territories by Average Annual Deforestation Rate (1990-2020)',
    labels={
        'Entity': 'Country/Territory',
        'Average Deforestation (k Ha/yr)': 'Average Deforestation Rate (Thousands of Hectares/Year)'
    },
    color='Average Deforestation (k Ha/yr)',
    color_continuous_scale=px.colors.sequential.Sunset,
)
fig_top10.update_layout(title_x=0.5)

# Display
fig_top10.show()

In [5]:
country_df = df[df['Code'].notna()].copy()

country_df['Deforestation (k Ha/yr)'] = country_df['Deforestation'] / 1000

country_df['Year_str'] = country_df['Year'].astype(str)

fig = px.choropleth(
    country_df,
    locations='Code',                
    color='Deforestation (k Ha/yr)', 
    hover_name='Entity',             
    animation_frame='Year_str',      
    color_continuous_scale=px.colors.sequential.YlOrRd, 
    scope='world',                   
    title='Annual Deforestation Rate Evolution by Country (1990-2015/2020)',
    labels={'Deforestation (k Ha/yr)': 'Deforestation Rate (Thousands of Hectares/Year)'}
)

fig.update_layout(
    title_x=0.5,
    
    sliders=[dict(
        steps=[dict(
            method='animate',
            args=[[str(y)]],
            label=str(y)
        ) for y in sorted(country_df['Year'].unique())]
    )]
)


print("Generated animated world map visualization of deforestation.")
fig.show()

Generated animated world map visualization of deforestation.


In [11]:
# Data Preparation 
country_df = df[df['Code'].notna()].copy()
country_df['Deforestation (k Ha/yr)'] = country_df['Deforestation'] / 1000
country_df['Year_str'] = country_df['Year'].astype(str)

#  Creating the map 
fig = px.choropleth(
    country_df,
    locations='Code',
    color='Deforestation (k Ha/yr)',
    hover_name='Entity',
    animation_frame='Year_str',

    color_continuous_scale=px.colors.sequential.Reds,
    range_color=[0, 500], 
    projection='natural earth', 
    title='<b>Global Deforestation Evolution</b><br><i>(Annual Rate in Thousands of Hectares)</i>',
)

fig.update_layout(
    title_font_size=24,
    title_x=0.5,
    font_family="Arial, sans-serif",
    margin=dict(l=20, r=20, t=80, b=20),
    coloraxis_colorbar=dict(
        title="Loss (k Ha/yr)",
        thickness=15,
        len=0.5,
        tickfont=dict(color='white')
    )
)

# customisation of the map to make it look better
fig.update_geos(
    visible=False, 
    showcountries=True, countrycolor="#8D8D8D", 
    showland=True, landcolor="#2A2A2A", 
    showocean=True, oceancolor="#0099FF" 
)

fig.show()

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression

df = pd.read_csv('annual-deforestation.csv')

if 'World' in df['Entity'].values:
    df_world = df[df['Entity'] == 'World'].copy()
else:
    df_world = df.groupby('Year')['Deforestation'].sum().reset_index()

df_world = df_world.sort_values('Year')

X = df_world['Year'].values.reshape(-1, 1)
y = df_world['Deforestation'].values

model = LinearRegression()
model.fit(X, y)

future_years = np.array([2020, 2025, 2030]).reshape(-1, 1)
future_predictions = model.predict(future_years)

df_history = df_world[['Year', 'Deforestation']].copy()
df_history['Type'] = 'Historical Data'

df_future = pd.DataFrame({
    'Year': future_years.flatten(),
    'Deforestation': future_predictions,
    'Type': 'AI Prediction'
})

df_forecast = pd.concat([df_history, df_future], ignore_index=True)

df_forecast['Deforestation (M Ha)'] = df_forecast['Deforestation'] / 1_000_000

fig = px.line(
    df_forecast, 
    x='Year', 
    y='Deforestation (M Ha)', 
    color='Type',
    markers=True,
    title='<b>AI Forecast: Global Deforestation Rate (Gross Loss)</b><br><i>(Projected Trend to 2030)</i>',
    labels={'Deforestation (M Ha)': 'Deforestation Rate (Million Hectares/Year)'},
    color_discrete_map={'Historical Data': 'red', 'AI Prediction': 'orange'}
)

mean_future_val = df_future['Deforestation'].mean() / 1_000_000

trend = "Increasing" if model.coef_[0] > 0 else "Decreasing"
fig.add_annotation(
    x=2025, y=mean_future_val, 
    text=f"Trend: {trend} Loss",
    showarrow=True,
    arrowhead=1
)

fig.update_layout(template='plotly_dark', title_x=0.5)
fig.show()

In [None]:
import pandas as pd
import plotly.express as px

df_share = pd.read_csv('forest-area-as-share-of-land-area.csv')
df_transition = pd.read_csv('forest-transition-phase.csv')
df_per_capita = pd.read_csv('per-capita-co2-food-deforestation.csv')
df_change = pd.read_csv('annual-change-forest-area.csv')

df_share_latest = df_share.sort_values('Year').drop_duplicates('Code', keep='last')

fig_share = px.choropleth(
    df_share_latest,
    locations='Code',
    color='Share of land covered by forest',
    hover_name='Entity',
    title='<b>Global Forest Cover Share</b><br><i>(% of Land Area - Latest Available Data)</i>',
    color_continuous_scale=px.colors.sequential.Greens,
    template='plotly_dark'
)
fig_share.update_layout(title_x=0.5)
fig_share.show()


target_year_co2 = 2013
df_pc_2013 = df_per_capita[df_per_capita['Year'] == target_year_co2].copy()

top_consumers = df_pc_2013.sort_values('per_capita_embodied_emissions', ascending=False).head(15)

fig_consumer = px.bar(
    top_consumers,
    x='per_capita_embodied_emissions',
    y='Entity',
    orientation='h',
    title=f'<b>Top 15 Countries by Per Capita Deforestation Emissions ({target_year_co2})</b><br><i>(CO2 from Food-Related Deforestation)</i>',
    labels={'per_capita_embodied_emissions': 'CO2 Emissions (Tonnes per person)', 'Entity': 'Country'},
    color='per_capita_embodied_emissions',
    color_continuous_scale=px.colors.sequential.Magma,
    template='plotly_dark'
)
fig_consumer.update_layout(title_x=0.5, yaxis={'categoryorder':'total ascending'})
fig_consumer.show()

target_year_matrix = 2010

df_share_yr = df_share[df_share['Year'] == target_year_matrix][['Code', 'Entity', 'Share of land covered by forest']]
df_change_yr = df_change[df_change['Year'] == target_year_matrix][['Code', 'Annual net change in forest area']]
df_trans_yr = df_transition.sort_values('Year').drop_duplicates('Code', keep='last')[['Code', 'Forest Transition Phase']]

df_matrix = pd.merge(df_share_yr, df_change_yr, on='Code', how='inner')
df_matrix = pd.merge(df_matrix, df_trans_yr, on='Code', how='inner')

fig_matrix = px.scatter(
    df_matrix,
    x='Share of land covered by forest',
    y='Annual net change in forest area',
    color='Forest Transition Phase',
    size='Share of land covered by forest', 
    hover_name='Entity',
    title=f'<b>Forest Transition Matrix ({target_year_matrix})</b><br><i>Clustering Countries by Forest Share vs. Net Change</i>',
    labels={
        'Share of land covered by forest': 'Forest Share (% of Land)',
        'Annual net change in forest area': 'Net Forest Change (Hectares/Year)'
    },
    template='plotly_dark',
    color_discrete_sequence=px.colors.qualitative.Bold
)

fig_matrix.add_hline(y=0, line_dash="dash", line_color="white", annotation_text="Net Zero (Balance)")
fig_matrix.update_layout(title_x=0.5)
fig_matrix.show()

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

df = pd.read_csv('annual-change-forest-area.csv')

df_clean = df[df['Code'].notna()].copy()
df_clean = df_clean[df_clean['Entity'] != 'World']

print(f"Data Loaded: {len(df_clean)} rows.")

train_df = df_clean[df_clean['Year'] < 2015]
test_df = df_clean[df_clean['Year'] == 2015]

print(f"Training Sets: {len(train_df)} rows. Test Sets: {len(test_df)} rows.")

countries = df_clean['Entity'].unique()
mae_scores = []
models = {} 
future_forecasts = []

print("Training models per country...")

for country in countries:
    c_train = train_df[train_df['Entity'] == country]
    c_test = test_df[test_df['Entity'] == country]
    
    if len(c_train) >= 2:
        X_train = c_train['Year'].values.reshape(-1, 1)
        y_train = c_train['Annual net change in forest area'].values
        
        model = LinearRegression()
        model.fit(X_train, y_train)
        
        if len(c_test) > 0:
            X_test = c_test['Year'].values.reshape(-1, 1)
            y_true = c_test['Annual net change in forest area'].values
            y_pred_test = model.predict(X_test)
            
            mae = mean_absolute_error(y_true, y_pred_test)
            mae_scores.append(mae)
        
        c_full = df_clean[df_clean['Entity'] == country]
        X_full = c_full['Year'].values.reshape(-1, 1)
        y_full = c_full['Annual net change in forest area'].values
        
        final_model = LinearRegression()
        final_model.fit(X_full, y_full)
        
        future_years = np.array([2020, 2025, 2030]).reshape(-1, 1)
        future_preds = final_model.predict(future_years)
        
        code = c_full['Code'].iloc[0]
        for yr, pred in zip(future_years.flatten(), future_preds):
            future_forecasts.append({
                'Entity': country,
                'Code': code,
                'Year': yr,
                'Predicted Net Change': pred,
                'Type': 'AI Forecast'
            })

avg_error = np.mean(mae_scores)
print(f"Model Evaluation Complete. Average Mean Absolute Error across all countries: {avg_error:.2f} Hectares")

df_forecast = pd.DataFrame(future_forecasts)

df_history = df_clean[['Entity', 'Code', 'Year', 'Annual net change in forest area']].copy()
df_history.rename(columns={'Annual net change in forest area': 'Predicted Net Change'}, inplace=True)
df_history['Type'] = 'Historical'

df_map = pd.concat([df_history, df_forecast], ignore_index=True)

df_map['Net Change (k Ha)'] = df_map['Predicted Net Change'] / 1000
df_map['Year_Str'] = df_map['Year'].astype(str)

df_2030 = df_map[df_map['Year'] == 2030].copy()

fig = px.choropleth(
    df_2030,
    locations='Code',
    color='Net Change (k Ha)',
    hover_name='Entity',
    title='<b>AI Forecast: The World in 2030</b><br><i>Predicted Annual Net Forest Change (Linear Model)</i>',
    color_continuous_scale=px.colors.diverging.RdYlGn,
    range_color=[-100, 100],
    template='plotly_dark'
)

fig.update_layout(title_x=0.5)
fig.show()

Data Loaded: 467 rows.
Training Sets: 348 rows. Test Sets: 119 rows.
Training models per country...
Model Evaluation Complete. Average Mean Absolute Error across all countries: 45147.09 Hectares


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

# 1. LOAD & PROCESS DATA
df = pd.read_csv('annual-change-forest-area.csv')

df_countries = df[df['Code'].notna()].copy()
df_countries = df_countries[df_countries['Entity'] != 'World']

train_df = df_countries[df_countries['Year'] < 2015]
test_df = df_countries[df_countries['Year'] == 2015]

errors = []

future_years = [2020, 2025, 2030]
forecast_rows = []

for country in df_countries['Entity'].unique():
    country_train = train_df[train_df['Entity'] == country]
    country_test = test_df[test_df['Entity'] == country]
    
    if len(country_train) >= 2 and not country_test.empty:
        X_train = country_train['Year'].values.reshape(-1, 1)
        y_train = country_train['Annual net change in forest area'].values
        X_test = country_test['Year'].values.reshape(-1, 1)
        y_actual = country_test['Annual net change in forest area'].values
        
        val_model = LinearRegression()
        val_model.fit(X_train, y_train)
        
        pred_2015 = val_model.predict(X_test)
        
        mae = mean_absolute_error(y_actual, pred_2015)
        errors.append(mae)

    country_full = df_countries[df_countries['Entity'] == country]
    
    if len(country_full) >= 2:
        X_full = country_full['Year'].values.reshape(-1, 1)
        y_full = country_full['Annual net change in forest area'].values
        
        final_model = LinearRegression()
        final_model.fit(X_full, y_full)
        
        X_future = np.array(future_years).reshape(-1, 1)
        y_future = final_model.predict(X_future)
        
        code = country_full['Code'].iloc[0]
        for yr, val in zip(future_years, y_future):
            forecast_rows.append([country, code, yr, val, 'AI Forecast'])

avg_error = np.mean(errors)
print(f"Model Validation Complete. Average Mean Absolute Error across all countries: {avg_error:.2f} Hectares")

df_forecast = pd.DataFrame(forecast_rows, columns=['Entity', 'Code', 'Year', 'Annual net change in forest area', 'Type'])
df_history = df_countries[['Entity', 'Code', 'Year', 'Annual net change in forest area']].copy()
df_history['Type'] = 'Historical Data'

df_combined = pd.concat([df_history, df_forecast], ignore_index=True)
df_combined['Net Change (k Ha/yr)'] = df_combined['Annual net change in forest area'] / 1000

df_combined['Year Label'] = df_combined.apply(lambda x: f"{x['Year']} (Forecast)" if x['Type'] == 'AI Forecast' else str(x['Year']), axis=1)
df_combined = df_combined.sort_values(['Year', 'Entity'])

fig = px.choropleth(
    df_combined,
    locations='Code',
    color='Net Change (k Ha/yr)',
    hover_name='Entity',
    animation_frame='Year Label',
    color_continuous_scale=px.colors.diverging.RdYlGn, 
    range_color=[-100, 100], 
    title=f'<b>AI Prediction: Net Forest Change (Validation Error: +/- {avg_error/1000:.1f}k Ha)</b>',
    template='plotly_dark'
)

fig.update_layout(title_x=0.5)
fig.show()

Model Validation Complete. Average Mean Absolute Error across all countries: 45147.09 Hectares
