<a href="https://colab.research.google.com/github/Osondu-ifunanya/Prediction-of-lake-eutrophication-using-satellite-derived-chlorophyll-data/blob/main/Predicion_of_Lake.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# ---------------------------
# 1. Generate Synthetic Data
# ---------------------------

# Simulated features:
# - Chlorophyll-a concentration (mg/m³) from satellite reflectance
# - Water temperature (°C)
# - Secchi depth (m) - water clarity
# - Total phosphorus (µg/L)

np.random.seed(42)
n_samples = 300

chlorophyll_a = np.random.uniform(1, 100, n_samples)  # mg/m³
water_temp = np.random.uniform(10, 30, n_samples)  # °C
secchi_depth = np.random.uniform(0.5, 5, n_samples)  # m
total_phosphorus = np.random.uniform(5, 100, n_samples)  # µg/L

# ---------------------------
# 2. Define eutrophication levels (Target Variable)
# ---------------------------
# 0 = Oligotrophic (Low nutrients)
# 1 = Mesotrophic (Moderate nutrients)
# 2 = Eutrophic (High nutrients)

def classify_eutrophication(chl, tp):
    if chl < 10 and tp < 20:
        return 0
    elif chl < 30 and tp < 50:
        return 1
    else:
        return 2

labels = [classify_eutrophication(chl, tp) for chl, tp in zip(chlorophyll_a, total_phosphorus)]

# Create DataFrame
df = pd.DataFrame({
    'Chlorophyll_a': chlorophyll_a,
    'Water_Temperature': water_temp,
    'Secchi_Depth': secchi_depth,
    'Total_Phosphorus': total_phosphorus,
    'Eutrophication_Level': labels
})

# ---------------------------
# 3. Train-Test Split
# ---------------------------
X = df[['Chlorophyll_a', 'Water_Temperature', 'Secchi_Depth', 'Total_Phosphorus']]
y = df['Eutrophication_Level']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ---------------------------
# 4. Train Random Forest Model
# ---------------------------
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# ---------------------------
# 5. Predictions and Evaluation
# ---------------------------
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# ---------------------------
# 6. Feature Importance
# ---------------------------
importances = model.feature_importances_
plt.barh(X.columns, importances, color='green')
plt.xlabel("Feature Importance")
plt.ylabel("Feature")
plt.title("Feature Importance for Eutrophication Prediction")
plt.show()

# ---------------------------
# 7. Save Synthetic Data
# ---------------------------
df.to_excel("synthetic_lake_eutrophication_data.xlsx", index=False)
print("Synthetic dataset saved as 'synthetic_lake_eutrophication_data.xlsx'")
