In [2]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import glob
# Define variables
dataset_url = r'C:\Users\abudh\Desktop\CropWatch\EuroSAT\2750'
batch_size = 32
img_height = 64
img_width = 64
validation_split = 0.2
rescale = 1.0 / 255

# Data preparation
datagen = ImageDataGenerator(validation_split=validation_split, rescale=rescale)

# Create datasets
train_dataset = datagen.flow_from_directory(
    directory=dataset_url,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    subset="training",
    class_mode='categorical'
)

test_dataset = datagen.flow_from_directory(
    directory=dataset_url,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    subset="validation",
    class_mode='categorical'
)

# Print number of classes
num_classes = len(train_dataset.class_indices)
print(f"Number of classes: {num_classes}")

# Print number of images in each class
class_counts = {class_name: len(glob.glob(os.path.join(dataset_url, class_name, '*'))) 
                 for class_name in train_dataset.class_indices}
for class_name, count in class_counts.items():
    print(f"Class '{class_name}': {count} images")


Found 21600 images belonging to 10 classes.
Found 5400 images belonging to 10 classes.
Number of classes: 10
Class 'AnnualCrop': 3000 images
Class 'Forest': 3000 images
Class 'HerbaceousVegetation': 3000 images
Class 'Highway': 2500 images
Class 'Industrial': 2500 images
Class 'Pasture': 2000 images
Class 'PermanentCrop': 2500 images
Class 'Residential': 3000 images
Class 'River': 2500 images
Class 'SeaLake': 3000 images


In [7]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import glob
from collections import Counter

# Define variables
dataset_url = r'C:\Users\abudh\Desktop\CropWatch\EuroSAT\2750'
batch_size = 32
img_height = 64
img_width = 64
validation_split = 0.2
rescale = 1.0 / 255

# Initialize the ImageDataGenerator
datagen = ImageDataGenerator(validation_split=validation_split, rescale=rescale)

# Create datasets
train_dataset = datagen.flow_from_directory(
    directory=dataset_url,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    subset="training",
    shuffle=False,  # No additional shuffling
    seed=0 , # Ensures reproducibility
    class_mode='categorical'
)

val_dataset = datagen.flow_from_directory(
    directory=dataset_url,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    subset="validation",
    shuffle=False,  # No additional shuffling
    seed=0 , # Ensures reproducibility    
    class_mode='categorical'
)

# Calculate overall distribution
overall_counts = {class_name: len(glob.glob(os.path.join(dataset_url, class_name, '*'))) 
                  for class_name in train_dataset.class_indices}

# Calculate training set distribution
train_counts = Counter(train_dataset.classes)

# Calculate validation set distribution
val_counts = Counter(val_dataset.classes)

# Reverse the class_indices dictionary to map indices back to class names
idx_to_class = {v: k for k, v in train_dataset.class_indices.items()}

# Convert indices back to class names in the counts
train_counts = {idx_to_class[idx]: count for idx, count in train_counts.items()}
val_counts = {idx_to_class[idx]: count for idx, count in val_counts.items()}

# Calculate totals
total_images = sum(overall_counts.values())
total_train_images = sum(train_counts.values())
total_val_images = sum(val_counts.values())

# Print distributions and proportions
print("Overall distribution:")
for class_name, count in overall_counts.items():
    proportion = count / total_images * 100
    print(f"Class '{class_name}': {count} images ({proportion:.2f}%)")

print("\nTraining set distribution:")
for class_name, count in train_counts.items():
    proportion = count / total_train_images * 100
    print(f"Class '{class_name}': {count} images ({proportion:.2f}%)")

print("\nValidation set distribution:")
for class_name, count in val_counts.items():
    proportion = count / total_val_images * 100
    print(f"Class '{class_name}': {count} images ({proportion:.2f}%)")


Found 21600 images belonging to 10 classes.
Found 5400 images belonging to 10 classes.
Overall distribution:
Class 'AnnualCrop': 3000 images (11.11%)
Class 'Forest': 3000 images (11.11%)
Class 'HerbaceousVegetation': 3000 images (11.11%)
Class 'Highway': 2500 images (9.26%)
Class 'Industrial': 2500 images (9.26%)
Class 'Pasture': 2000 images (7.41%)
Class 'PermanentCrop': 2500 images (9.26%)
Class 'Residential': 3000 images (11.11%)
Class 'River': 2500 images (9.26%)
Class 'SeaLake': 3000 images (11.11%)

Training set distribution:
Class 'AnnualCrop': 2400 images (11.11%)
Class 'Forest': 2400 images (11.11%)
Class 'HerbaceousVegetation': 2400 images (11.11%)
Class 'Highway': 2000 images (9.26%)
Class 'Industrial': 2000 images (9.26%)
Class 'Pasture': 1600 images (7.41%)
Class 'PermanentCrop': 2000 images (9.26%)
Class 'Residential': 2400 images (11.11%)
Class 'River': 2000 images (9.26%)
Class 'SeaLake': 2400 images (11.11%)

Validation set distribution:
Class 'AnnualCrop': 600 images 

In [8]:
datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.8, 1.2],
    channel_shift_range=0.2,
    fill_mode='nearest'
)

# Create datasets
train_dataset = datagen.flow_from_directory(
    directory=dataset_url,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    subset="training",
    shuffle=False,  # No additional shuffling
    seed=0 , # Ensures reproducibility
    class_mode='categorical'
)

val_dataset = datagen.flow_from_directory(
    directory=dataset_url,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    subset="validation",
    shuffle=False,  # No additional shuffling
    seed=0 , # Ensures reproducibility    
    class_mode='categorical'
)


In [9]:
from sklearn.utils.class_weight import compute_class_weight

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Compute class weights
class_indices = train_dataset.class_indices
class_labels = list(class_indices.keys())
class_indices = list(class_indices.values())

# Count the number of images in each class
class_counts = np.array([len(train_dataset.filenames) // len(class_labels) for _ in class_labels])

# Get the number of samples per class in the training set
class_counts_dict = dict(zip(class_labels, class_counts))

# Compute class weights
weights = compute_class_weight(
    class_weight='balanced',
    classes=np.array(class_indices),
    y=np.array([train_dataset.classes[i] for i in range(len(train_dataset.classes))])
)

# Convert to dictionary format for model training
class_weight_dict = dict(zip(class_labels, weights))

# Print the class weights
print("Class weights:")
for label, weight in class_weight_dict.items():
    print(f"Class '{label}': {weight:.4f}")


Class weights:
Class 'AnnualCrop': 0.9000
Class 'Forest': 0.9000
Class 'HerbaceousVegetation': 0.9000
Class 'Highway': 1.0800
Class 'Industrial': 1.0800
Class 'Pasture': 1.3500
Class 'PermanentCrop': 1.0800
Class 'Residential': 0.9000
Class 'River': 1.0800
Class 'SeaLake': 0.9000
