In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def load_and_preprocess_data(file_path):
    try:
        data = pd.read_csv(file_path)
        logging.info(f"Data loaded successfully from {file_path}")
    except FileNotFoundError:
        logging.error(f"File not found: {file_path}")
        return None
    except pd.errors.EmptyDataError:
        logging.error(f"The file {file_path} is empty")
        return None
    except pd.errors.ParserError:
        logging.error(f"Error parsing the CSV file: {file_path}")
        return None

    # Ensure required columns are present
    required_columns = ['country', 'tc_loss_ha_2003', 'tc_loss_ha_2023']
    if not all(col in data.columns for col in required_columns):
        logging.error("Missing required columns in the dataset")
        return None

    # Preprocess the data
    le = LabelEncoder()
    data['country_encoded'] = le.fit_transform(data['country'].astype(str))

    return data

def prepare_features(data):
    features = ['country_encoded', 'threshold']
    current_year = 2023
    loss_columns = [col for col in data.columns if col.startswith('tc_loss_ha_') and int(col.split('_')[-1]) < current_year]
    features.extend(sorted(loss_columns))
    
    X = data[features]
    y = data[f'tc_loss_ha_{current_year}']
    
    return X, y, features

def train_and_evaluate_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train)
    
    y_pred = rf_model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    logging.info(f"Model Performance - MSE: {mse:.2f}, R-squared: {r2:.2f}")
    
    return rf_model

def predict_future_loss(model, data, features, years_ahead=5):
    future_data = data.copy()
    current_year = 2023
    
    for i in range(1, years_ahead + 1):
        year = current_year + i
        temp_data = future_data[features].copy()
        
        predictions = model.predict(temp_data)
        
        future_data[f'tc_loss_ha_{year}'] = predictions
    
    return future_data

def suggest_afforestation(future_data):
    current_year = 2023
    future_loss_columns = [col for col in future_data.columns if col.startswith('tc_loss_ha_') and int(col.split('_')[-1]) > current_year]
    
    future_data['total_future_loss'] = future_data[future_loss_columns].sum(axis=1)
    
    high_loss_areas = future_data.nlargest(10, 'total_future_loss')
    
    print("\nTop 10 areas for afforestation efforts:")
    
    for index, row in high_loss_areas.iterrows():
        print(f"\nArea: {row['country']}")
        print(f"Tree cover threshold: {row['threshold']}%")
        print(f"Predicted loss over next {len(future_loss_columns)} years: {row['total_future_loss']:.2f} ha")
        print(f"Suggested afforestation: {row['total_future_loss'] * 1.2:.2f} ha")
        
        # Provide different suggestions based on the threshold value
        if row['threshold'] < 20:
            print("Recommendations:")
            print("1. Immediate intervention needed to restore tree cover.")
            print("2. Implement large-scale afforestation programs.")
            print("3. Engage local communities to promote awareness.")
            print("4. Focus on fast-growing native species.")
            print("5. Establish monitoring systems to track progress.")
        
        elif 20 <= row['threshold'] < 50:
            print("Recommendations:")
            print("1. Targeted afforestation programs focusing on degraded areas.")
            print("2. Promote sustainable land management practices.")
            print("3. Collaborate with NGOs for community engagement.")
            print("4. Use a mix of native and non-invasive species.")
            print("5. Monitor tree growth and health regularly.")

        else:  # row['threshold'] >= 50
            print("Recommendations:")
            print("1. Maintain existing forest cover through conservation efforts.")
            print("2. Implement selective logging practices.")
            print("3. Encourage agroforestry systems to enhance biodiversity.")
            print("4. Conduct educational programs on forest conservation.")
            print("5. Establish protected areas to safeguard against deforestation.")

def analyze_feature_importance(model, features):
    feature_importance = pd.DataFrame({'feature': features, 'importance': model.feature_importances_})
    feature_importance = feature_importance.sort_values('importance', ascending=False)
    
    print("\nTop 10 most important features for prediction:")
    print(feature_importance.head(10))

def run_model(file_path):
    data = load_and_preprocess_data(file_path)
    if data is None:
        return
    
    X, y, features = prepare_features(data)
    
    model = train_and_evaluate_model(X, y)
    
    future_data = predict_future_loss(model, data, features)
    
    suggest_afforestation(future_data)
    
    analyze_feature_importance(model, features)

# Example usage
file_path = 'C:/Users/jessd/Downloads/Bit N Built/subnationalforestcover.csv'
run_model(file_path)

2024-10-12 19:30:20,220 - INFO - Data loaded successfully from C:/Users/jessd/Downloads/Bit N Built/subnationalforestcover.csv
2024-10-12 19:30:26,143 - INFO - Model Performance - MSE: 19560.30, R-squared: 0.95



Top 10 areas for afforestation efforts:

Area: India
Tree cover threshold: 0%
Predicted loss over next 5 years: 39829.80 ha
Suggested afforestation: 47795.76 ha
Recommendations:
1. Immediate intervention needed to restore tree cover.
2. Implement large-scale afforestation programs.
3. Engage local communities to promote awareness.
4. Focus on fast-growing native species.
5. Establish monitoring systems to track progress.

Area: India
Tree cover threshold: 10%
Predicted loss over next 5 years: 36202.45 ha
Suggested afforestation: 43442.94 ha
Recommendations:
1. Immediate intervention needed to restore tree cover.
2. Implement large-scale afforestation programs.
3. Engage local communities to promote awareness.
4. Focus on fast-growing native species.
5. Establish monitoring systems to track progress.

Area: India
Tree cover threshold: 15%
Predicted loss over next 5 years: 36080.95 ha
Suggested afforestation: 43297.14 ha
Recommendations:
1. Immediate intervention needed to restore tree 