<a href="https://colab.research.google.com/github/Osondu-ifunanya/Ecological-niche-modeling-under-climate-change-using-ensemble-learning/blob/main/Ecological%20niche%20modelling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
Ecological Niche Modeling under Climate Change
Using Ensemble Machine Learning (Synthetic Data)

Models:
- Random Forest
- Gradient Boosting
- Logistic Regression

Outputs:
- Current suitability map
- Future suitability map
- Ensemble prediction
- Excel export of results
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

np.random.seed(42)

# ----------------------------------
# 1. Generate Synthetic Environmental Data
# ----------------------------------

n_samples = 2000

# Environmental predictors (Current Climate)
temperature = np.random.normal(15, 5, n_samples)   # °C
precipitation = np.random.normal(1000, 300, n_samples)  # mm/year
elevation = np.random.normal(500, 200, n_samples)  # meters
soil_moisture = np.random.uniform(0.1, 0.8, n_samples)

# Species suitability logic (synthetic ecological response curve)
def species_probability(temp, prec, elev, soil):
    return (
        np.exp(-(temp - 18)**2 / 20) *
        np.exp(-(prec - 1100)**2 / 200000) *
        np.exp(-(elev - 600)**2 / 80000) *
        soil
    )

prob = species_probability(temperature, precipitation, elevation, soil_moisture)
prob = prob / prob.max()

occurrence = np.random.binomial(1, prob)

df = pd.DataFrame({
    "temperature": temperature,
    "precipitation": precipitation,
    "elevation": elevation,
    "soil_moisture": soil_moisture,
    "presence": occurrence
})

# ----------------------------------
# 2. Train/Test Split
# ----------------------------------

X = df.drop(columns=["presence"])
y = df["presence"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ----------------------------------
# 3. Train Ensemble Models
# ----------------------------------

rf = RandomForestClassifier(n_estimators=200, random_state=42)
gb = GradientBoostingClassifier(random_state=42)
lr = LogisticRegression(max_iter=1000)

rf.fit(X_train, y_train)
gb.fit(X_train, y_train)
lr.fit(X_train_scaled, y_train)

# ----------------------------------
# 4. Evaluate Models
# ----------------------------------

rf_pred = rf.predict_proba(X_test)[:,1]
gb_pred = gb.predict_proba(X_test)[:,1]
lr_pred = lr.predict_proba(X_test_scaled)[:,1]

rf_auc = roc_auc_score(y_test, rf_pred)
gb_auc = roc_auc_score(y_test, gb_pred)
lr_auc = roc_auc_score(y_test, lr_pred)

print("Model AUC Scores:")
print("Random Forest:", rf_auc)
print("Gradient Boosting:", gb_auc)
print("Logistic Regression:", lr_auc)

# Ensemble (mean probability)
ensemble_pred = (rf_pred + gb_pred + lr_pred) / 3
ensemble_auc = roc_auc_score(y_test, ensemble_pred)
print("Ensemble AUC:", ensemble_auc)

# ----------------------------------
# 5. Future Climate Scenario
# ----------------------------------

# Simulate climate change (+2°C, -10% precipitation)
future_temp = temperature + 2
future_prec = precipitation * 0.9

future_df = pd.DataFrame({
    "temperature": future_temp,
    "precipitation": future_prec,
    "elevation": elevation,
    "soil_moisture": soil_moisture
})

future_scaled = scaler.transform(future_df)

rf_future = rf.predict_proba(future_df)[:,1]
gb_future = gb.predict_proba(future_df)[:,1]
lr_future = lr.predict_proba(future_scaled)[:,1]

ensemble_future = (rf_future + gb_future + lr_future) / 3

# ----------------------------------
# 6. Visualization
# ----------------------------------

plt.figure(figsize=(10,5))

plt.subplot(1,2,1)
plt.hist(ensemble_pred, bins=30)
plt.title("Current Suitability")

plt.subplot(1,2,2)
plt.hist(ensemble_future, bins=30)
plt.title("Future Suitability (Climate Change)")

plt.tight_layout()
plt.show()

# ----------------------------------
# 7. Export Results
# ----------------------------------

output = pd.DataFrame({
    "current_suitability": (rf.predict_proba(X)[:,1] +
                            gb.predict_proba(X)[:,1] +
                            lr.predict_proba(scaler.transform(X))[:,1]) / 3,
    "future_suitability": ensemble_future
})

output.to_excel("ecological_niche_modeling_results.xlsx", index=False)

print("Results exported to ecological_niche_modeling_results.xlsx")
