In [None]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image

In [None]:
def rle_encode(mask):
    """
    Convert binary mask to RLE string (standard Kaggle format: "3 5 2 1").
    """
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


In [None]:
def visualize_mask(mask, title):
    """Visualize mask with pixel values and grid"""
    plt.figure(figsize=(6, 6))
    plt.imshow(mask, cmap='gray', vmin=0, vmax=1)
    plt.title(title)
    plt.axis('off')
    
    # Add grid
    for i in range(mask.shape[0] + 1):
        plt.axhline(i - 0.5, color='red', alpha=0.3, linewidth=0.5)
        plt.axvline(i - 0.5, color='red', alpha=0.3, linewidth=0.5)
    
    # Show pixel values
    for i in range(mask.shape[0]):
        for j in range(mask.shape[1]):
            plt.text(j, i, str(mask[i, j]), ha='center', va='center', 
                    color='blue' if mask[i, j] == 0 else 'white', fontweight='bold')
    
    plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# [Code for visualize_mask function here]
def visualize_mask(mask, title):
    """Visualize mask with pixel values and grid"""
    plt.figure(figsize=(6, 6))
    plt.imshow(mask, cmap='gray', vmin=0, vmax=1)
    plt.title(title)
    plt.axis('off')
    
    # Add grid
    for i in range(mask.shape[0] + 1):
        plt.axhline(i - 0.5, color='red', alpha=0.3, linewidth=0.5)
        plt.axvline(i - 0.5, color='red', alpha=0.3, linewidth=0.5)
    
    # Show pixel values
    for i in range(mask.shape[0]):
        for j in range(mask.shape[1]):
            plt.text(j, i, str(mask[i, j]), ha='center', va='center', 
                    color='blue' if mask[i, j] == 0 else 'white', fontweight='bold')
    
    plt.show()

# Corrected test mask definition:
test_mask = np.array([
    [1, 0, 0], # Added commas between rows
    [1, 1, 0],
    [0, 1, 1],
    [0, 0, 1]
], dtype=np.uint8)

# Visualize it
visualize_mask(test_mask, "Example Binary Mask Visualization")


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# 1. Prepare data (e.g., an increasing trend)
x_values = np.linspace(0, 10, 50)  # 50 points between 0 and 10
y_values = np.sin(x_values) + np.random.normal(0, 0.1, 50) # Sine wave with some noise

# 2. Create the plot
plt.figure(figsize=(8, 4)) # Optional: set the figure size
plt.plot(x_values, y_values, label='Sine Wave with Noise', color='blue', linestyle='-')

# 3. Add labels, title, and a legend
plt.xlabel('X Axis Value')
plt.ylabel('Y Axis Value')
plt.title('Simple Line Plot Example')
plt.legend()
plt.grid(True) # Optional: add a grid

# 4. Display the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# 1. Prepare data (random data for demonstration)
np.random.seed(42)
x_data = np.random.rand(100) * 10
y_data = 2 * x_data + np.random.rand(100) * 5 # A positive correlation

# 2. Create the plot
plt.figure(figsize=(6, 6))
plt.scatter(x_data, y_data, color='red', marker='o', alpha=0.6)

# 3. Add labels and title
plt.xlabel('Variable X')
plt.ylabel('Variable Y')
plt.title('Scatter Plot: X vs Y')

# 4. Display the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# 1. Prepare data (e.g., normally distributed data)
data = np.random.randn(1000) # 1000 random values (mean 0, std dev 1)

# 2. Create the plot
plt.figure(figsize=(7, 4))
plt.hist(data, bins=30, color='green', edgecolor='black', alpha=0.7)

# 3. Add labels and title
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram of Random Data')

# 4. Display the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# 1. Prepare dummy data for training metrics
epochs = range(1, 11) # 10 epochs
train_loss = [0.8, 0.6, 0.4, 0.3, 0.25, 0.21, 0.18, 0.16, 0.15, 0.14]
val_loss = [0.75, 0.65, 0.5, 0.4, 0.35, 0.3, 0.28, 0.27, 0.26, 0.25]
train_acc = [0.7, 0.75, 0.8, 0.85, 0.88, 0.9, 0.91, 0.92, 0.93, 0.935]
val_acc = [0.72, 0.74, 0.78, 0.82, 0.84, 0.86, 0.87, 0.88, 0.89, 0.895]

# 2. Create the figure and a set of subplots
# (1 row, 2 columns) - returns the figure object and an array of axis objects (axs)
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(12, 5))

# --- Plot 1: Loss ---
axs[0].plot(epochs, train_loss, label='Training Loss', color='blue')
axs[0].plot(epochs, val_loss, label='Validation Loss', color='orange')
axs[0].set_title('Model Loss Over Epochs')
axs[0].set_xlabel('Epoch')
axs[0].set_ylabel('Loss Value')
axs[0].legend()
axs[0].grid(True)

# --- Plot 2: Accuracy ---
axs[1].plot(epochs, train_acc, label='Training Accuracy', color='green')
axs[1].plot(epochs, val_acc, label='Validation Accuracy', color='red')
axs[1].set_title('Model Accuracy Over Epochs')
axs[1].set_xlabel('Epoch')
axs[1].set_ylabel('Accuracy Value')
axs[1].legend()
axs[1].grid(True)

# 3. Add a super title for the entire figure and display
fig.suptitle('Training Metrics Summary')
plt.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust layout to prevent title overlap
plt.show()


In [None]:
simple_mask = np.array([
    [1, 0],
    [1, 1]
])
print(f"Mask:\n{simple_mask}")
print(f"Flattened: {simple_mask.flatten()}")
print(f"RLE: '{rle_encode(simple_mask)}'")
visualize_mask(simple_mask, "Simple 2x2 Mask")

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import cv2 # Using OpenCV to load images if needed, or you can use PIL

# 1. Prepare dummy data (you would load this from your dataset)
# Using a simple red square in a blue image as an example
image = np.zeros((100, 100, 3), dtype=np.uint8)
image[:, :] = [0, 100, 200] # Blueish background
image[25:75, 25:75] = [255, 0, 0] # Red square

# A binary mask for the red square
mask = np.zeros((100, 100), dtype=np.uint8)
mask[25:75, 25:75] = 1

# 2. Create the figure and subplots (1 row, 2 columns)
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(10, 5))

# --- Plot 1: The Original Image ---
axs[0].imshow(image)
axs[0].set_title('Original Image')
axs[0].axis('off') # Hide axis ticks

# --- Plot 2: The Binary Mask ---
# Use cmap='gray' for binary masks
axs[1].imshow(mask, cmap='gray', vmin=0, vmax=1)
axs[1].set_title('Binary Mask')
axs[1].axis('off')

# 3. Display the plots
plt.show()


In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np

# 1. Prepare dummy image data
image = np.zeros((200, 300, 3), dtype=np.uint8)
image[:, :] = [200, 200, 200] # Gray background

# 2. Define a dummy bounding box [x, y, width, height]
# x, y are the coordinates of the lower-left corner
bbox = [50, 70, 100, 80]

# 3. Create the figure and axes
fig, ax = plt.subplots(1, figsize=(8, 6))

# Display the image
ax.imshow(image)

# Create a Rectangle patch for the bounding box
rect = patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3],
                         linewidth=2, edgecolor='r', facecolor='none')

# Add the patch to the axes
ax.add_patch(rect)

# Set title and remove axes
ax.set_title('Image with Bounding Box')
ax.axis('off')

# Display the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import seaborn as sns # Optional, makes the plot look nicer

# 1. Prepare dummy data (replace with your actual validation masks and predictions)
# Example of flattened data:
y_true = np.array([0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0])
y_pred = np.array([0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0])

# 2. Calculate the confusion matrix
cm = confusion_matrix(y_true, y_pred, labels=[0, 1])
# cm = [[TN, FP], [FN, TP]]

# 3. Plot the confusion matrix using ConfusionMatrixDisplay
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[ "Background (0)", "Forgery (1)"])

plt.figure(figsize=(6, 6))
disp.plot(cmap=plt.cm.Blues, values_format='d') # use 'd' for integer counts

plt.title('Confusion Matrix for Validation Set')
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import roc_curve, auc

# 1. Prepare dummy data (replace with actual ground truth and predicted probabilities)
# y_true are the binary labels (0 or 1)
# y_probs are the raw probabilities (0.0 to 1.0)
y_true = np.array([0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0])
y_probs = np.array([0.1, 0.6, 0.9, 0.2, 0.3, 0.8, 0.7, 0.95, 0.85, 0.15, 0.25, 0.05])

# 2. Calculate the False Positive Rate (fpr), True Positive Rate (tpr), and thresholds
fpr, tpr, thresholds = roc_curve(y_true, y_probs)
roc_auc = auc(fpr, tpr) # Calculate the AUC score

# 3. Plot the ROC curve
plt.figure(figsize=(7, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random Guessing (AUC = 0.50)')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR) / Recall')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.grid(True)

# 4. Display the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Dummy data from previous example
epochs = range(1, 11) 
train_loss = [0.8, 0.6, 0.4, 0.3, 0.25, 0.21, 0.18, 0.16, 0.15, 0.14]

# Plot the loss using the horizontal line marker ('_')
plt.figure(figsize=(8, 4))
plt.plot(epochs, train_loss, label='Training Loss', color='blue', marker='_') # Use marker='_'

plt.xlabel('Epoch')
plt.ylabel('Loss Value')
plt.title('Training Loss with Minus Markers')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Create a large square mask (1s)
large_square = np.ones((50, 50), dtype=np.uint8)

# Create a small square mask inside it (0s inside 1s)
small_square = np.zeros((50, 50), dtype=np.uint8)
small_square[20:30, 20:30] = 1 # A 'hole' mask

# Subtract the 'hole' mask to create the final shape
# The final mask will have 1s where the large square was, but 0s in the center hole area.
minus_shape_mask = large_square - small_square

# Visualize the resulting mask
plt.figure(figsize=(5, 5))
plt.imshow(minus_shape_mask, cmap='gray', vmin=0, vmax=1)
plt.title('Mask Created by Subtraction (Donut Shape)')
plt.axis('off')
plt.show()


In [None]:
class EarlyStopping:
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt'):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

# Usage in your training loop:
# early_stopping = EarlyStopping(patience=10, verbose=True)
# 
# for epoch in range(NUM_EPOCHS):
#     # ... (run one training epoch) ...
#     # ... (calculate validation loss for this epoch) ...
#     
#     early_stopping(val_loss_epoch, model)
#     if early_stopping.early_stop:
#         print("Early stopping triggered")
#         break


In [None]:
plus_mask = np.zeros((9, 9), dtype=np.uint8)
plus_mask[2:7, 4] = 1  # Vertical line
plus_mask[4, 2:7] = 1  # Horizontal line

print("Mask visualization:")
for i in range(9):
    print(' '.join(map(str, plus_mask[i])))

print(f"Flattened (first 20): {' '.join(map(str, plus_mask.flatten()[:20]))}...")
print(f"RLE: '{rle_encode(plus_mask)}'")
visualize_mask(plus_mask, "Plus Shape Mask")

In [None]:
minus_mask = np.zeros((9, 9), dtype=np.uint8)
minus_mask[4, 2:7] = 1  # Horizontal line

print("Mask visualization:")
for i in range(9):
    print(' '.join(map(str, minus_mask[i])))

print(f"RLE: '{rle_encode(minus_mask)}'")
visualize_mask(minus_mask, "Minus Shape Mask")

In [None]:
def get_edge_map(image_rgb):
    # Convert RGB image to grayscale
    gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
    # Use Canny edge detection
    edges = cv2.Canny(gray, 100, 200)
    # Normalize edges to be 0 or 1 float
    edges = edges.astype(np.float32) / 255.0
    return edges


In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_simple_loss_graph(history):
    """Generates a simple line plot of the provided loss history."""
    epochs = range(1, len(history) + 1)
    
    plt.figure(figsize=(8, 5))
    
    # Plotting the loss
    plt.plot(epochs, history, label='Training Loss', marker='o', linestyle='-', color='blue')
    
    plt.title('Model Training Progress (Loss vs. Epochs)')
    plt.xlabel('Epoch Number')
    plt.ylabel('Loss Value')
    plt.legend()
    plt.grid(True)
    plt.show()

# --- Example Usage: ---

# This is the dummy data you would typically collect during a training run:
sample_loss_history = [0.8, 0.6, 0.4, 0.3, 0.25, 0.21, 0.18, 0.16, 0.15, 0.14]

# Call the function to display the plot
plot_simple_loss_graph(sample_loss_history)


In [None]:
test_mask = np.array([[1, 1, 0, 0, 1, 0]])
print(f"Test mask: {test_mask.flatten()}")

# Step by step explanation
pixels = test_mask.flatten()
print(f"1. Flatten: {pixels}")

padded = np.concatenate([[0], pixels, [0]])
print(f"2. Add borders: {padded}")

changes = np.where(padded[1:] != padded[:-1])[0] + 1
print(f"3. Find changes: {changes}")

runs = changes.copy()
runs[1::2] -= runs[::2]
print(f"4. Calculate lengths: {runs}")

result = ' '.join(str(x) for x in runs)
print(f"5. Final RLE: '{result}'")

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# 1. Prepare dummy data (replace with your actual data DataFrame)
# Example of metadata you might collect about your images
data = {
    'Image_Width': np.random.randint(100, 500, 50),
    'Image_Height': np.random.randint(100, 500, 50),
    'Pixel_Mean': np.random.rand(50) * 0.5 + 0.25,
    'Forgery_Area_Ratio': np.random.rand(50) * 0.2
}
df = pd.DataFrame(data)
df['Aspect_Ratio'] = df['Image_Width'] / df['Image_Height']

# 2. Calculate the correlation matrix
corr_matrix = df.corr()

# 3. Create the heatmap plot
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=.5)

plt.title('Feature Correlation Heatmap')
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# 1. Prepare dummy data with categories
# Data for 'authentic' and 'forged' categories
authentic_areas = np.random.rand(50) * 10
forged_areas = np.random.rand(50) * 50 # Forged images might have larger altered areas

data_to_plot = [authentic_areas, forged_areas]
labels = ['Authentic', 'Forged']

# 2. Create the box plot
plt.figure(figsize=(7, 5))
plt.boxplot(data_to_plot, labels=labels, patch_artist=True, vert=True)

# 3. Add labels and title
plt.title('Distribution of Forgery Areas by Category')
plt.ylabel('Area Size (pixels/ratio)')
plt.xlabel('Image Type')
plt.grid(axis='y')

plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# 1. Prepare dummy data: 50 authentic, 50 forged (100 total)
np.random.seed(42)
image_widths = np.random.randint(100, 500, 100)
forgery_ratios = np.random.rand(100) * 0.2
# Create a color map array: 0 for Authentic (first 50), 1 for Forged (last 50)
categories = np.array([0] * 50 + [1] * 50) 

plt.figure(figsize=(8, 6))

# Use a colormap ('viridis') and the 'c' argument to map colors to the 'categories' array
scatter = plt.scatter(image_widths, forgery_ratios, c=categories, cmap='viridis', alpha=0.7)

# Create a legend manually (more complex in pure matplotlib than in seaborn/plotly)
handles, labels = scatter.legend_elements(prop="colors", alpha=0.6)
legend_labels = ['Authentic', 'Forged']
plt.legend(handles, legend_labels, loc="lower right", title="Type")

plt.xlabel('Image Width')
plt.ylabel('Forgery Area Ratio')
plt.title('Scatter Plot colored by Category (Matplotlib)')
plt.grid(True)
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# 1. Prepare dummy data: Counts of images that have a forgery vs those that don't, 
# categorized by some "source" or "folder A/B"
categories = ['Source A', 'Source B', 'Source C']
has_forgery = np.array([200, 150, 300])
no_forgery = np.array([300, 350, 200]) # Ensures totals are 500 each

# 2. Plotting the stacked bars
fig, ax = plt.subplots(figsize=(8, 5))

# Plot 'no_forgery' bars first (bottom layer)
ax.bar(categories, no_forgery, label='No Forgery (Authentic)', color='skyblue')

# Plot 'has_forgery' bars on top (using 'bottom=no_forgery' to stack them)
ax.bar(categories, has_forgery, bottom=no_forgery, label='Has Forgery (Forged)', color='coral')

ax.set_ylabel('Number of Images')
ax.set_title('Image Counts by Source and Forgery Status')
ax.legend()

plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# 1. Prepare dummy data (100 images)
widths = np.random.randint(100, 500, 100)
heights = np.random.randint(100, 500, 100)
# 'File size' in MB for each image (used to determine marker size)
file_sizes_mb = np.random.rand(100) * 5 + 1 

# Scale file size so points aren't too small/large on the plot
marker_sizes = file_sizes_mb * 50

plt.figure(figsize=(8, 6))

# Use the 's' parameter for marker size
scatter = plt.scatter(widths, heights, s=marker_sizes, alpha=0.5, color='purple', edgecolors='black')

plt.xlabel('Image Width')
plt.ylabel('Image Height')
plt.title('Image Dimensions Colored by File Size')
plt.grid(True)
plt.show()


<div style="
    background: linear-gradient(135deg, #1a1f2c 0%, #2d3748 50%, #4a5568 100%);
    border: 2px solid #63b3ed;
    border-radius: 15px;
    padding: 25px;
    margin: 20px 0;
    box-shadow: 0 0 30px rgba(99, 179, 237, 0.4),
                inset 0 0 20px rgba(255, 255, 255, 0.1);
    color: #f1f5f9;
    font-family: 'Segoe UI', system-ui, sans-serif;
    position: relative;
    overflow: hidden;
">

<div style="
    position: absolute;
    top: -20px;
    right: -20px;
    width: 100px;
    height: 100px;
    background: radial-gradient(circle, rgba(99, 179, 237, 0.25) 0%, transparent 70%);
    border-radius: 50%;
"></div>

<div style="
    position: absolute;
    bottom: -40px;
    left: -40px;
    width: 120px;
    height: 120px;
    background: radial-gradient(circle, rgba(99, 179, 237, 0.2) 0%, transparent 70%);
    border-radius: 50%;
"></div>

<h1 style="
    color: #63b3ed;
    margin: 0 0 20px 0;
    text-align: center;
    font-weight: 700;
    font-size: 1.8em;
    text-shadow: 0 0 15px rgba(99, 179, 237, 0.6);
    position: relative;
    z-index: 1;
">
    Create sumission distributed by the most frequent position in the mask
</h1>

In [None]:
import os
import cv2
import json
import numpy as np
import pandas as pd

from PIL import Image
from scipy.stats import gaussian_kde

np.random.seed(81)

sample_submission_path = '/kaggle/input/recodai-luc-scientific-image-forgery-detection/sample_submission.csv'
test_images_dir = '/kaggle/input/recodai-luc-scientific-image-forgery-detection/test_images'
train_masks_dir = '/kaggle/input/recodai-luc-scientific-image-forgery-detection/train_masks'

sample_submission = pd.read_csv(sample_submission_path)

def analyze_mask_distribution():
    if not os.path.exists(train_masks_dir):
        return None, None, None
    
    all_positions = []
    all_sizes = []
    all_aspect_ratios = []
    
    for mask_file in os.listdir(train_masks_dir):
        if mask_file.endswith('.npy'):
            mask_path = os.path.join(train_masks_dir, mask_file)
            try:
                mask = np.load(mask_path)
                
                if mask.ndim == 3:
                    if mask.shape[0] == 1:
                        mask = mask[0]
                    elif mask.shape[2] == 1:
                        mask = mask[:, :, 0]
                    else:
                        mask = (mask > 0).astype(np.uint8)
                        if mask.ndim == 3:
                            mask = mask[:, :, 0] if mask.shape[2] == 1 else mask[:, :, 0]
                
                if mask.ndim != 2 or np.sum(mask) == 0:
                    continue
                
                contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                
                for contour in contours:
                    if len(contour) >= 5:
                        ellipse = cv2.fitEllipse(contour)
                        center, axes, angle = ellipse
                        
                        height, width = mask.shape
                        norm_center_x = center[0] / width
                        norm_center_y = center[1] / height
                        
                        norm_major_axis = max(axes) / max(width, height)
                        norm_minor_axis = min(axes) / max(width, height)
                        
                        aspect_ratio = min(axes) / max(axes) if max(axes) > 0 else 1.0
                        
                        all_positions.append((norm_center_x, norm_center_y))
                        all_sizes.append((norm_major_axis, norm_minor_axis))
                        all_aspect_ratios.append(aspect_ratio)
                        
            except Exception:
                continue
    
    if not all_positions:
        return None, None, None
    
    positions_array = np.array(all_positions)
    if len(positions_array) > 1:
        kde = gaussian_kde(positions_array.T)
    else:
        kde = None
    
    sizes_array = np.array(all_sizes)
    aspect_ratios_array = np.array(all_aspect_ratios)
    
    size_mean = np.mean(sizes_array, axis=0) if len(sizes_array) > 0 else (0.02, 0.01)
    size_std = np.std(sizes_array, axis=0) if len(sizes_array) > 0 else (0.01, 0.005)
    aspect_mean = np.mean(aspect_ratios_array) if len(aspect_ratios_array) > 0 else 0.7
    aspect_std = np.std(aspect_ratios_array) if len(aspect_ratios_array) > 0 else 0.2
    
    return kde, (size_mean, size_std), (aspect_mean, aspect_std)

def generate_ellipse_mask(height, width, kde, size_stats, aspect_stats):
    size_mean, size_std = size_stats
    aspect_mean, aspect_std = aspect_stats
    
    if kde and len(kde.dataset.T) > 1:
        random_idx = np.random.randint(0, len(kde.dataset.T))
        center_x, center_y = kde.dataset.T[random_idx]
    else:
        center_x = np.random.normal(0.5, 0.2)
        center_y = np.random.normal(0.5, 0.2)
        center_x = np.clip(center_x, 0.1, 0.9)
        center_y = np.clip(center_y, 0.1, 0.9)
    
    major_axis = np.random.normal(size_mean[0], size_std[0])
    minor_axis = np.random.normal(size_mean[1], size_std[1])
    
    major_axis = np.clip(major_axis, 0.005, 0.1)
    minor_axis = np.clip(minor_axis, 0.003, 0.08)
    
    aspect_ratio = np.random.normal(aspect_mean, aspect_std)
    aspect_ratio = np.clip(aspect_ratio, 0.3, 0.95)
    minor_axis = major_axis * aspect_ratio
    
    mask = np.zeros((height, width), dtype=np.uint8)
    
    abs_center_x = int(center_x * width)
    abs_center_y = int(center_y * height)
    abs_major = int(major_axis * max(height, width))
    abs_minor = int(minor_axis * max(height, width))
    
    abs_major = max(abs_major, 2)
    abs_minor = max(abs_minor, 2)
    
    cv2.ellipse(mask, 
                (abs_center_x, abs_center_y),
                (abs_minor, abs_major),
                angle=np.random.uniform(0, 180),
                startAngle=0,
                endAngle=360,
                color=1,
                thickness=-1)
    
    if np.random.random() < 0.3:
        kernel_size = np.random.choice([1, 3])
        if kernel_size > 1:
            mask = cv2.GaussianBlur(mask.astype(np.float32), (kernel_size, kernel_size), 0)
            mask = (mask > 0.3).astype(np.uint8)
    
    return mask

def generate_irregular_mask(height, width, kde, size_stats):
    mask = generate_ellipse_mask(height, width, kde, size_stats, (0.7, 0.2))
    
    if np.random.random() < 0.5:
        kernel = np.ones((2, 2), np.uint8)
        if np.random.random() < 0.5:
            mask = cv2.erode(mask, kernel, iterations=1)
        else:
            mask = cv2.dilate(mask, kernel, iterations=1)
    
    return mask

def generate_multiple_masks(height, width, kde, size_stats, aspect_stats):
    num_masks = np.random.choice([1, 2, 3], p=[0.7, 0.2, 0.1])
    final_mask = np.zeros((height, width), dtype=np.uint8)
    
    for _ in range(num_masks):
        mask = generate_ellipse_mask(height, width, kde, size_stats, aspect_stats)
        final_mask = np.logical_or(final_mask, mask)
    
    return final_mask.astype(np.uint8)

def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

kde, size_stats, aspect_stats = analyze_mask_distribution()

submission_data = []

for case_id in sample_submission['case_id']:
    img_path = os.path.join(test_images_dir, f"{case_id}.png")
    
    if not os.path.exists(img_path):
        annotation = 'authentic'
    else:
        with Image.open(img_path) as img:
            width, height = img.size
        
        if np.random.random() < 0.005:
            mask_type = np.random.choice(['ellipse', 'irregular', 'multiple'], p=[0.6, 0.3, 0.1])
            
            if mask_type == 'ellipse':
                mask = generate_ellipse_mask(height, width, kde, size_stats, aspect_stats)
            elif mask_type == 'irregular':
                mask = generate_irregular_mask(height, width, kde, size_stats)
            else:
                mask = generate_multiple_masks(height, width, kde, size_stats, aspect_stats)
            
            if np.sum(mask) > 0:
                RLE_res = rle_encode(mask)
                res = [int(x) for x in RLE_res.split()]
                annotation = json.dumps(res)
            else:
                annotation = 'authentic'
        else:
            annotation = 'authentic'
    
    submission_data.append({
        'case_id': case_id,
        'annotation': annotation
    })

submission = pd.DataFrame(submission_data)
submission.to_csv('submission.csv', index=False)
submission.head()