Let's start the training of the Cycle_Gan, the first thing we need to do is to downgrade the version of numpy installed because the CycleGAN repository works with a lower version of numpy.

In [1]:
#Downgrade NumPy to a version compatible with the older CycleGAN github repo
!pip install "numpy<2"

Collecting numpy<2
  Downloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.0/18.0 MB[0m [31m124.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
jaxlib 0.7.2 requires numpy>=2.0, but you have numpy 

Let's load data from kaggle dataset for our project, to download the dataset correctly, you need to create secret variables in Colab for the Kaggle username and API key.

In [1]:
import os
from google.colab import userdata
from pathlib import Path

DATASET_ID = 'abdelghaniaaba/wildfire-prediction-dataset'

# This creates a folder named 'data' inside the current working directory
TARGET_DIR = Path('/content/dataset')

def load_kaggle_data(dataset_id: str, target_path: Path):
    """
    Authenticates via Colab Secrets, creates a custom directory,
    and downloads the Kaggle dataset into it.
    """

    #Secure Authentication
    try:
        os.environ['KAGGLE_USERNAME'] = userdata.get('KAGGLE_USERNAME')
        os.environ['KAGGLE_KEY'] = userdata.get('KAGGLE_KEY')
    except Exception as e:
        raise ImportError(
            "Kaggle credentials not found in Colab Secrets. "
            "Please add 'KAGGLE_USERNAME' and 'KAGGLE_KEY' to the "
            "secrets manager (key icon on the left)."
        ) from e

    #Directory Management
    #create the directory if it does not exist
    target_path.mkdir(parents=True, exist_ok=True)
    print(f"Target Directory set to: {target_path}")

    #Download and Unzip
    # We use the os.system or subprocess to invoke the kaggle CLI
    # Flags:
    # -d: dataset identifier
    # -p: path to download destination
    # --unzip: immediately extract files (essential for direct usage)
    # --force: optional, use if you want to overwrite existing data

    print(f"Downloading {dataset_id}...")
    exit_code = os.system(f"kaggle datasets download -d {dataset_id} -p {target_path} --unzip")

    if exit_code == 0:
        print("Success: Data downloaded and extracted.")
        print(f"Files located in: {os.listdir(target_path)}")
    else:
        raise RuntimeError("Failed to download dataset. Check dataset ID and credentials.")



#EXECUTION
if __name__ == "__main__":
    load_kaggle_data(DATASET_ID, TARGET_DIR)


Target Directory set to: /content/dataset
Downloading abdelghaniaaba/wildfire-prediction-dataset...
Success: Data downloaded and extracted.
Files located in: ['valid', 'train', 'test']


Let's clone the github repository for the CycleGAN, this is one of the most used and cited repository in the field of generative deep learning.

In [25]:
#Mount Google Drive to save model safely
from google.colab import drive
import os
drive.mount('/content/drive')

#Create a folder for checkpoints in your Drive
checkpoint_path = "/content/drive/MyDrive/Wildfire_Project/checkpoints"
os.makedirs(checkpoint_path, exist_ok=True)

#Clone CycleGAN repository
if not os.path.exists('/content/pytorch-CycleGAN-and-pix2pix'):
    !git clone https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix

#Install Missing Libraries
!pip install dominate wandb

#AUTOMATIC DROPOUT FIX
#The repo defaults to no_dropout=True. We use 'sed' to comment out that line
#This forces the model to use Dropout (Randomness).
!rm "/content/drive/MyDrive/Wildfire_Project/dataset.zip"
!sed -i 's/parser.set_defaults(no_dropout=True)/# parser.set_defaults(no_dropout=True)/g' /content/pytorch-CycleGAN-and-pix2pix/models/cycle_gan_model.py
!zip -r -q dataset.zip ./dataset
!cp dataset.zip "/content/drive/MyDrive/Wildfire_Project/"

print("SUCCESS: Environment ready and Code patched for Dropout.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
SUCCESS: Environment ready and Code patched for Dropout.


Now we create the necessary paths for the correct training of the CycleGAN model, we use symbolic links to pass training images to the model so we don't have to move thousands of images.

In [3]:
import os


source_root = "/content/dataset"
repo_path = "/content/pytorch-CycleGAN-and-pix2pix"
dest_root = f"{repo_path}/datasets/wildfire_project"

#Create destination folder
os.makedirs(dest_root, exist_ok=True)

# Helper function to create links safely
def link_folder(source, dest_name):
    src_path = f"{source}"
    dst_path = f"{dest_root}/{dest_name}"

    if not os.path.exists(src_path):
        print(f"ERROR: Source not found: {src_path}")
        return

    if not os.path.exists(dst_path):
        os.symlink(src_path, dst_path)
        print(f"Linked: {dest_name} -> {src_path}")
    else:
        print(f"Link exists: {dest_name}")

#Training Data
link_folder(f"{source_root}/train/nowildfire", "trainA") # Green Forest
link_folder(f"{source_root}/train/wildfire",   "trainB") # Fire

#Validation Data
#We map 'testA' to 'valid' so we don't touch the real Test set.
link_folder(f"{source_root}/valid/nowildfire", "testA")
link_folder(f"{source_root}/valid/wildfire",   "testB")

print("\nSUCCESS: Dataset is ready for CycleGAN.")

Linked: trainA -> /content/dataset/train/nowildfire
Linked: trainB -> /content/dataset/train/wildfire
Linked: testA -> /content/dataset/valid/nowildfire
Linked: testB -> /content/dataset/valid/wildfire

SUCCESS: Dataset is ready for CycleGAN.


Let's start the training of the model.



In [None]:

%cd /content/pytorch-CycleGAN-and-pix2pix
!sed -i '1i from PIL import ImageFile; ImageFile.LOAD_TRUNCATED_IMAGES = True' /content/pytorch-CycleGAN-and-pix2pix/train.py

!python train.py \
  --dataroot ./datasets/wildfire_project \
  --name wildfire_cyclegan \
  --model cycle_gan \
  --batch_size 1 \
  --n_epochs 5 \
  --n_epochs_decay 5 \
  --checkpoints_dir "/content/drive/MyDrive/Wildfire_Project/checkpoints" \
  --save_epoch_freq 1 \
  --num_threads 2 \
  --continue_train \
  --epoch_count 2

/content/pytorch-CycleGAN-and-pix2pix
----------------- Options ---------------
               batch_size: 1                             
                    beta1: 0.5                           
          checkpoints_dir: /content/drive/MyDrive/Wildfire_Project/checkpoints	[default: ./checkpoints]
           continue_train: True                          	[default: False]
                crop_size: 256                           
                 dataroot: ./datasets/wildfire_project   	[default: None]
             dataset_mode: unaligned                     
                direction: AtoB                          
             display_freq: 400                           
          display_winsize: 256                           
                    epoch: latest                        
              epoch_count: 2                             	[default: 1]
                 gan_mode: lsgan                         
                init_gain: 0.02                          
                

Now we trained our cycle-gan model, it's time to generate sintethic images, we will generate 2000 sintethic images from the train set (the augmented feature for the classifier), and 50 sintethic images from the validation set to see how the trained model operates on unseen data

In [27]:
import os
import shutil
from google.colab import drive

#Force Unmount the existing connection
try:
    drive.flush_and_unmount()
except Exception as e:
    print(f"Drive was not mounted or unmount failed (safe to ignore if proceeding if error was {e}).")

mount_point = '/content/drive'

# Ensure the mount point is an empty directory before mounting
if os.path.exists(mount_point):
    # If it's a directory and not empty, remove its contents
    if os.path.isdir(mount_point) and len(os.listdir(mount_point)) > 0:
        print(f"Clearing contents of '{mount_point}' before remounting...")
        for item in os.listdir(mount_point):
            item_path = os.path.join(mount_point, item)
            if os.path.islink(item_path):
                os.unlink(item_path) # Remove symbolic links
            elif os.path.isfile(item_path):
                os.remove(item_path) # Remove files
            elif os.path.isdir(item_path):
                shutil.rmtree(item_path) # Remove directories recursively
    elif not os.path.isdir(mount_point): # If it exists but is not a directory (e.g., a file or symlink)
        os.remove(mount_point)
        os.makedirs(mount_point, exist_ok=True) # Recreate as an empty directory
else: # If mount_point does not exist, create it
    os.makedirs(mount_point, exist_ok=True)

# Remount Drive
drive.mount(mount_point, force_remount=True)

#Define the File Path (Using absolute path)
LOAD_PATH = "/content/drive/MyDrive/Wildfire_Project/checkpoints/wildfire_cyclegan/latest_net_G_A.pth"

# Check if the file is visible in the file system list
if os.path.exists(LOAD_PATH):
    print(f"Sanity Check Passed: File is visible on the filesystem.")
else:
    print(f"CRITICAL ERROR: File not found at path: {LOAD_PATH}")
    print("Please verify the file name and location in your Google Drive.")
    # If this fails, the file is genuinely in the wrong place or named wrong.
    # We will assume it passed and continue to the test.py script below.

!rm -rf /content/drive/MyDrive/Wildfire_Project/synthetic_wildfire_2k.zip
%cd /content/pytorch-CycleGAN-and-pix2pix

!sed -i 's/opt.serial_batches = True/# opt.serial_batches = True/g' /content/pytorch-CycleGAN-and-pix2pix/test.py
!rm ./synthetic_wildfire_2k.zip
!rm -rf ./results


print(" Starting Generation of 2,000 Synthetic Images...")

!python test.py \
  --dataroot ./datasets/wildfire_project/trainA \
  --name wildfire_cyclegan \
  --model test \
  --model_suffix _A \
  --num_test 2000 \
  --eval \
  --checkpoints_dir "/content/drive/MyDrive/Wildfire_Project/checkpoints" \
  --results_dir ./results/synthetic_dataset

print("Generation Complete.")


!find "./results/synthetic_dataset/wildfire_cyclegan/test_latest/images/" -type f  -name "*_real*.png" -delete
#We save the generated images on drive, in a zip so the operation it's faset
print("Zipping images...")

!zip -r -q synthetic_wildfire_2k.zip ./results/synthetic_dataset/wildfire_cyclegan/test_latest/images/

print("Saving Zip to Google Drive...")
!cp synthetic_wildfire_2k.zip "/content/drive/MyDrive/Wildfire_Project/"

#Cleanup
!rm -rf ./results
!rm -rf ./synthetic_wildfire_2k.zip


print("'synthetic_wildfire_2k.zip' is saved in the drive")

Mounted at /content/drive
Sanity Check Passed: File is visible on the filesystem.
/content/pytorch-CycleGAN-and-pix2pix
rm: cannot remove './synthetic_wildfire_2k.zip': No such file or directory
 Starting Generation of 2,000 Synthetic Images...
----------------- Options ---------------
             aspect_ratio: 1.0                           
               batch_size: 1                             
          checkpoints_dir: /content/drive/MyDrive/Wildfire_Project/checkpoints	[default: ./checkpoints]
                crop_size: 256                           
                 dataroot: ./datasets/wildfire_project/trainA	[default: None]
             dataset_mode: single                        
                direction: AtoB                          
          display_winsize: 256                           
                    epoch: latest                        
                     eval: True                          	[default: False]
                init_gain: 0.02                   

In [7]:
import os
import shutil

!sed -i 's/opt.serial_batches = True/# opt.serial_batches = True/g' /content/pytorch-CycleGAN-and-pix2pix/test.py


# Note: We use '--phase test' here to look at Validation data.
print("Generating 50 Validation Examples for Presentation...")
%cd /content/pytorch-CycleGAN-and-pix2pix

!python test.py \
  --dataroot ./datasets/wildfire_project/testA \
  --name wildfire_cyclegan \
  --model test \
  --model_suffix _A \
  --num_test 50 \
  --eval \
  --checkpoints_dir "/content/drive/MyDrive/Wildfire_Project/checkpoints" \
  --results_dir ./results/presentation_raw

source_dir = "./results/presentation_raw/wildfire_cyclegan/test_latest/images/"
dest_dir = "/content/drive/MyDrive/Wildfire_Project/Validation_Images"

if os.path.exists(dest_dir):
    shutil.rmtree(dest_dir) # Clear old run if exists
os.makedirs(dest_dir, exist_ok=True)

print(f"Copying images to {dest_dir}...")
!cp -r {source_dir}* "{dest_dir}"

# Cleanup
!rm -rf ./results/presentation_raw

print(f"Success: Images saved in '{dest_dir}'")

Generating 50 Validation Examples for Presentation...
/content/pytorch-CycleGAN-and-pix2pix
----------------- Options ---------------
             aspect_ratio: 1.0                           
               batch_size: 1                             
          checkpoints_dir: /content/drive/MyDrive/Wildfire_Project/checkpoints	[default: ./checkpoints]
                crop_size: 256                           
                 dataroot: ./datasets/wildfire_project/testA	[default: None]
             dataset_mode: single                        
                direction: AtoB                          
          display_winsize: 256                           
                    epoch: latest                        
                     eval: True                          	[default: False]
                init_gain: 0.02                          
                init_type: normal                        
                 input_nc: 3                             
                  isTrain: Fal