# Deforestation Detection using CNN (Synthetic Dataset)
This notebook trains a small CNN to classify images as **forest** or **deforested**. The dataset included is synthetic and generated programmatically when the notebook runs.

**Sections**:
1. Generate / inspect dataset
2. Prepare data generators
3. Build & train CNN
4. Evaluate and save model

Feel free to replace `dataset/` with a real dataset for improved results.


In [None]:
# Imports and configuration
import os
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path

# Check TensorFlow / Keras availability
try:
    import tensorflow as tf
    from tensorflow.keras import layers, models
    print("TensorFlow version:", tf.__version__)
except Exception as e:
    print("TensorFlow not available. Install with: pip install tensorflow")
    raise e

DATA_DIR = Path("dataset")
print("DATA_DIR exists:", DATA_DIR.exists())

In [None]:
# If dataset missing, generate synthetic dataset (safety in case user removed it)
from PIL import Image, ImageDraw
import numpy as np
import random

def make_forest_image(size=(128,128), variance=30):
    img = Image.new("RGB", size, (34,139,34))
    arr = np.array(img).astype(np.int16)
    noise = np.random.randint(-variance, variance, size + (3,))
    arr = np.clip(arr + noise, 0, 255).astype(np.uint8)
    img = Image.fromarray(arr)
    for _ in range(np.random.randint(3,7)):
        x = np.random.randint(0,size[0])
        y = np.random.randint(0,size[1])
        r = np.random.randint(6,20)
        color = (np.random.randint(20,60), np.random.randint(80,160), np.random.randint(20,60))
        ImageDraw.Draw(img).ellipse((x-r,y-r,x+r,y+r), fill=color)
    return img

def make_deforested_image(size=(128,128), variance=30):
    img = Image.new("RGB", size, (139,69,19))
    arr = np.array(img).astype(np.int16)
    noise = np.random.randint(-variance, variance, size + (3,))
    arr = np.clip(arr + noise, 0, 255).astype(np.uint8)
    img = Image.fromarray(arr)
    draw = ImageDraw.Draw(img)
    for _ in range(np.random.randint(3,8)):
        x = np.random.randint(0,size[0])
        y = np.random.randint(0,size[1])
        w = np.random.randint(8,30); h = np.random.randint(4,12)
        draw.rectangle((x, y, x+w, y+h), fill=(120,80,60))
    for _ in range(np.random.randint(2,5)):
        x = np.random.randint(0,size[0])
        y = np.random.randint(0,size[1])
        r = np.random.randint(3,8)
        draw.ellipse((x-r,y-r,x+r,y+r), fill=(70,40,20))
    return img

# Generate if missing
for split in ['train','test']:
    for cls in ['forest','deforested']:
        d = DATA_DIR / split / cls
        if not d.exists() or len(list(d.glob('*.png'))) == 0:
            d.mkdir(parents=True, exist_ok=True)
            n = 80 if split=='train' else 20
            for i in range(n):
                if cls=='forest':
                    img = make_forest_image()
                else:
                    img = make_deforested_image()
                img.save(d / f'{cls}_{i:03d}.png')
print('Dataset ready at', DATA_DIR)

In [None]:
# Display a few sample images
import matplotlib.pyplot as plt
from PIL import Image
fig, axes = plt.subplots(2,4, figsize=(10,5))
i = 0
for cls in ['forest','deforested']:
    p = DATA_DIR / 'train' / cls
    files = sorted(p.glob('*.png'))[:4]
    for j,f in enumerate(files):
        ax = axes[0 if cls=='forest' else 1, j]
        ax.imshow(Image.open(f))
        ax.set_title(f.name)
        ax.axis('off')
plt.tight_layout()


In [None]:
# Prepare ImageDataGenerators
from tensorflow.keras.preprocessing.image import ImageDataGenerator

img_size = (128,128)
batch_size = 16

train_datagen = ImageDataGenerator(rescale=1./255, horizontal_flip=True, rotation_range=15)
test_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_directory(
    DATA_DIR / 'train',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary',
    shuffle=True
)

test_gen = test_datagen.flow_from_directory(
    DATA_DIR / 'test',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False
)


In [None]:
# Build a small CNN model
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Input(shape=img_size + (3,)),
    layers.Conv2D(32, (3,3), activation='relu', padding='same'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, (3,3), activation='relu', padding='same'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, (3,3), activation='relu', padding='same'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:
# Train the model (small number of epochs to keep runtime short)
epochs = 6
history = model.fit(
    train_gen,
    validation_data=test_gen,
    epochs=epochs
)


In [None]:
# Evaluation
loss, acc = model.evaluate(test_gen)
print(f"Test loss: {loss:.4f}, Test accuracy: {acc:.4f}")

# Save model
model.save('deforestation_cnn_model.h5')
print("Saved model to deforestation_cnn_model.h5")


In [None]:
# Predict on some test images and display results
import numpy as np
from tensorflow.keras.preprocessing import image
from pathlib import Path

test_files = list((DATA_DIR/'test'/'forest').glob('*.png'))[:3] + list((DATA_DIR/'test'/'deforested').glob('*.png'))[:3]
fig, axes = plt.subplots(2,3, figsize=(9,6))
for ax, f in zip(axes.flatten(), test_files):
    img = image.load_img(f, target_size=img_size)
    x = image.img_to_array(img)/255.0
    pred = model.predict(np.expand_dims(x, axis=0))[0][0]
    label = 'deforested' if pred>=0.5 else 'forest'
    ax.imshow(img)
    ax.set_title(f"{f.name}\npred={pred:.2f} -> {label}")
    ax.axis('off')
plt.tight_layout()


## Next steps and tips
- Replace the synthetic dataset with real satellite imagery (e.g., Planet, Sentinel-2, or labeled Kaggle datasets) for realistic results.
- Try transfer learning with EfficientNet or ResNet for better performance on small datasets.
- Crop/patch large satellite images into smaller tiles and label them.
