In [1]:
# Cell 1: Install & Import Libraries
# Make sure you're in the right environment!
!pip install tensorflow opencv-python-headless matplotlib scikit-learn

import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate, Conv2DTranspose, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import MeanIoU
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt


[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting tensorflow
  Using cached tensorflow-2.20.0-cp311-cp311-win_amd64.whl.metadata (4.6 kB)
Collecting opencv-python-headless
  Downloading opencv_python_headless-4.12.0.88-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Using cached absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Using cached astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.9.23-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Using cached gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google_pasta>=0.1.1 (from tensorflow)
  Using cached google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Using cached libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt_einsum>=2.3.2 (from tensorflow)
  Using cac



In [9]:
# Cell 2: Connect to Kaggle (VS Code Version)

!pip install kaggle
import os
import shutil

# Define paths
kaggle_json_filename = 'kaggle.json'
kaggle_dir_path = os.path.expanduser('~/.kaggle')

# Check if kaggle.json is in the same folder as the notebook
if not os.path.exists(kaggle_json_filename):
    print("--- ERROR ---")
    print(f"'{kaggle_json_filename}' not found. Please download it from Kaggle")
    print(f"and place it in the same folder as this notebook.")
    print("---------------")
else:
    # Create the .kaggle directory if it doesn't exist
    os.makedirs(kaggle_dir_path, exist_ok=True)
    
    # Copy the file to the .kaggle directory
    shutil.copy(kaggle_json_filename, os.path.join(kaggle_dir_path, 'kaggle.json'))
    
    # Set permissions (this is important!)
    os.chmod(os.path.join(kaggle_dir_path, 'kaggle.json'), 0o600)
    
    print(f"Successfully configured '{kaggle_json_filename}'!")

Successfully configured 'kaggle.json'!



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [12]:
# Cell 4: Define Parameters and File Paths (Updated)
import os

# --- Parameters ---
IMG_WIDTH = 256
IMG_HEIGHT = 256
IMG_CHANNELS = 3

# --- File Paths ---
# (CHANGED) Point this to your actual folder name
DATA_PATH = './Water bodies dataset/' 

# These paths assume your folder has 'images' and 'masks' subfolders
IMAGE_PATH = os.path.join(DATA_PATH, 'Images')
MASK_PATH = os.path.join(DATA_PATH, 'Masks')

# --- Check if paths are correct ---
if not os.path.exists(DATA_PATH):
    print(f"--- ERROR ---")
    print(f"Directory not found: {DATA_PATH}")
    print(f"Please make sure your 'Water bodies dataset' folder is in the same directory as this notebook.")
elif not os.path.exists(IMAGE_PATH):
    print(f"--- ERROR ---")
    print(f"Sub-directory not found: {IMAGE_PATH}")
    print(f"Does your 'Water bodies dataset' folder contain an 'images' folder?")
elif not os.path.exists(MASK_PATH):
    print(f"--- ERROR ---")
    print(f"Sub-directory not found: {MASK_PATH}")
    print(f"Does your 'Water bodies dataset' folder contain a 'masks' folder?")
else:
    print(f"Data paths look correct! Ready to proceed.")

Data paths look correct! Ready to proceed.


In [15]:
from sklearn.model_selection import train_test_split
import os

# These paths are based on what you set in Cell 4
# IMAGE_PATH = os.path.join(DATA_PATH, 'images')
# MASK_PATH = os.path.join(DATA_PATH, 'masks')

image_files = sorted([os.path.join(IMAGE_PATH, f) for f in os.listdir(IMAGE_PATH) if f.endswith('.jpg')])

# (FIXED) Change .png to the correct extension (probably .jpg)
mask_files = sorted([os.path.join(MASK_PATH, f) for f in os.listdir(MASK_PATH) if f.endswith('.jpg')])

# --- New Check ---
print(f"Found {len(image_files)} images.")
print(f"Found {len(mask_files)} masks.")

if len(image_files) != len(mask_files):
    print("--- WARNING: Mismatch in file counts! ---")
    print("Make sure each image has a corresponding mask.")
else:
    print("File counts match. Proceeding to split.")

# Split into training and validation sets
img_train, img_val, mask_train, mask_val = train_test_split(image_files, mask_files, test_size=0.2, random_state=42)

print(f"Total Images: {len(image_files)}")
print(f"Training Images: {len(img_train)}")
print(f"Validation Images: {len(img_val)}")

Found 2841 images.
Found 2841 masks.
File counts match. Proceeding to split.
Total Images: 2841
Training Images: 2272
Validation Images: 569


In [17]:
from sklearn.model_selection import train_test_split
import os

# These paths are based on what you set in Cell 4
# IMAGE_PATH = os.path.join(DATA_PATH, 'images')
# MASK_PATH = os.path.join(DATA_PATH, 'masks')

image_files = sorted([os.path.join(IMAGE_PATH, f) for f in os.listdir(IMAGE_PATH) if f.endswith('.jpg')])

# (FIXED) Change .png to .jpg, matching your actual mask file extension
mask_files = sorted([os.path.join(MASK_PATH, f) for f in os.listdir(MASK_PATH) if f.endswith('.jpg')])

# --- New Check ---
print(f"Found {len(image_files)} images.")
print(f"Found {len(mask_files)} masks.")

if len(image_files) != len(mask_files) or len(image_files) == 0:
    print("--- ERROR: Mismatch in file counts! ---")
    print("Please check your 'images' and 'masks' folders.")
    print("You might be using the wrong file extension (e.g., .png vs .jpg).")
else:
    print("File counts match. Proceeding to split.")

    # Split into training and validation sets
    img_train, img_val, mask_train, mask_val = train_test_split(image_files, mask_files, test_size=0.2, random_state=42)

    print(f"Total Images: {len(image_files)}")
    print(f"Training Images: {len(img_train)}")
    print(f"Validation Images: {len(img_val)}")

Found 2841 images.
Found 2841 masks.
File counts match. Proceeding to split.
Total Images: 2841
Training Images: 2272
Validation Images: 569
