# EDA on the Parking Space Detection

## Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import json
import os

from PIL import Image
import io
import base64

import random
from shapely.geometry import Polygon


In [None]:

# Function to get the minimum bounding box
def get_minimum_bounding_box(points):
    polygon = Polygon(points)
    mbr = polygon.minimum_rotated_rectangle
    return list(mbr.exterior.coords[:-1])  # Exclude duplicate closure point

# Function to get the convex hull
def get_convex_hull(points):
    polygon = Polygon(points)
    hull = polygon.convex_hull
    return list(hull.exterior.coords[:-1])

# Function to handle keypoints and ensure four keypoints
def handle_keypoints(points):
    if len(points) == 5:
        # Check if the fifth point is the same as the first (redundant)
        if points[4] == points[0]:
            points = points[:4]  # Remove the duplicate
        else:
            # If the fifth point is not redundant, fit a bounding box
            points = get_minimum_bounding_box(points)
    return points

## Dataset Looks Like

In [None]:
TEST_FILE = "/kaggle/input/surround-view-mmu-parking-slots-dataset/main_data/labels/0809_1058.json"

In [None]:
def show_example_from_file(filename: str=TEST_FILE):
    with open(filename, 'r') as f:
        keypoints = json.load(f)
    detections = keypoints["shapes"]
    image = Image.open(filename.replace("json", "jpg").replace("labels", "images"))
    
    fig, ax = plt.subplots(figsize=(10, 8))
    
    ax.imshow(image)
    for points in detections:
        # x_coords, y_coords = zip(*points["points"])
        handled_points = handle_keypoints(points["points"])
        x_coords, y_coords = zip(*handled_points)
        
        ax.plot(x_coords + (x_coords[0],), y_coords + (y_coords[0],), marker='o', linestyle='-', label=points["label"])
        ax.fill(x_coords, y_coords, alpha=0.3)  # Optional: fill the polygon
        centroid_x = sum(x_coords) / len(x_coords)
        centroid_y = sum(y_coords) / len(y_coords)
        ax.text(centroid_x, centroid_y, points["label"], fontsize=12, color='white', ha='center', va='center')
        ax.legend()
    plt.show()

In [None]:
def show_n_files(directory, n, grid_size=(2, 2)):
    json_files = [f for f in os.listdir(directory) if f.endswith(".json")]
    n = min(n, len(json_files), grid_size[0] * grid_size[1])
    fig, axes = plt.subplots(grid_size[0], grid_size[1], figsize=(15, 10))
    axes = axes.flatten()

    sampled_files = random.sample(json_files, n)
    
    for i, filename in enumerate(sampled_files):
        json_path = os.path.join(directory, filename)
    
        with open(json_path, 'r') as f:
            keypoints = json.load(f)
        
        if "shapes" not in keypoints:
            print(f"Skipping file {filename}: 'shapes' key not found.")
            continue
        
        detections = keypoints["shapes"]
        

        image_path = json_path.replace("json", "jpg").replace("labels", "images")
        if not os.path.exists(image_path):
            print(f"Skipping file {filename}: Image file {image_path} not found.")
            continue
        
        image = Image.open(image_path)
        

        ax = axes[i]
        ax.imshow(image)
        ax.axis("off") 
        
        for points in detections:
            handled_points = handle_keypoints(points["points"])
            x_coords, y_coords = zip(*handled_points)
        
            x_coords = x_coords + (x_coords[0],)
            y_coords = y_coords + (y_coords[0],)
            

            ax.plot(x_coords, y_coords, marker='o', linestyle='-', label=points["label"])
            ax.fill(x_coords[:-1], y_coords[:-1], alpha=0.3) 
            
            centroid_x = sum(x_coords[:-1]) / (len(x_coords) - 1)
            centroid_y = sum(y_coords[:-1]) / (len(y_coords) - 1)
            ax.text(centroid_x, centroid_y, points["label"], fontsize=8, color='white', 
                    ha='center', va='center')
        
        ax.legend(fontsize=8, loc="upper right")
    
    # for j in range(i + 1, len(axes)):
    #     axes[j].axis("off")
    
    plt.tight_layout()
    plt.show()

In [None]:
show_n_files('/kaggle/input/surround-view-mmu-parking-slots-dataset/main_data/labels', 16, (2,8))

In [None]:
label_data = []
label_dir = '/kaggle/input/surround-view-mmu-parking-slots-dataset/main_data/labels'
label_files = os.listdir('/kaggle/input/surround-view-mmu-parking-slots-dataset/main_data/labels')

## Taking a Sample to Explore

In [None]:
sample = random.sample(label_files, 2000)

In [None]:
for file_name in sample:
    if file_name.endswith('.json'):
        with open(os.path.join(label_dir, file_name), 'r') as f:
            label_data.append(json.load(f))

## Converting to Dataframe for better Visualizations

In [None]:
rows = []
for labels in label_data:
    for shape in labels['shapes']:
        true_flags = [key for key, value in labels['flags'].items() if value]
        if len(true_flags) > 1:
            print("more than one")
        for flag in true_flags:
            row= ({
                'flag': flag, 
                'imagePath': labels['imagePath'],
                'imageHeight': labels['imageHeight'],
                'imageWidth': labels['imageWidth'],
                'label': shape['label'],
            })
            # Truncating to the first four, some images have more than 4 but we dont need that
            for i, (x, y) in enumerate(shape['points'][:4]):
                row[f'x{i+1}'] = x
                row[f'y{i+1}'] = y
            rows.append(row)

data = pd.DataFrame(rows)
data.head()


In [None]:
data.info()

In [None]:
data.describe()

## Questions asked from the dataset

In [None]:
# Questions can be asked are

# Whats the Label Distribution
# Whats the average area of the bounding box
# Are there any outliers in bounding boxes
# Whats the differnet types of heights of images
# How many types of parking spaces are available in the sample
# What the ratio of flags

In [None]:
# Label distribution
sns.countplot(x='label', data=data)
plt.title('Label Distribution')
plt.show()

In [None]:
# Area of bounding box

data['bbox_width'] = abs(data['x2'] - data['x1'])
data['bbox_height'] = abs(data['y3'] - data['y1'])
data['bbox_area'] = data['bbox_width'] * data['bbox_height']

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(18, 6))

# Width
axs[0].hist(data['bbox_width'], bins=10, color='lightblue', edgecolor='black')
axs[0].set_title('Distribution of Bounding Box Width')
axs[0].set_xlabel('Width')
axs[0].set_ylabel('Frequency')

# Height
axs[1].hist(data['bbox_height'], bins=10, color='lightgreen', edgecolor='black')
axs[1].set_title('Distribution of Bounding Box Height')
axs[1].set_xlabel('Height')
axs[1].set_ylabel('Frequency')

# Area
axs[2].hist(data['bbox_area'], bins=10, color='lightcoral', edgecolor='black')
axs[2].set_title('Distribution of Bounding Box Area')
axs[2].set_xlabel('Area')
axs[2].set_ylabel('Frequency')

plt.tight_layout()
plt.show()


In [None]:
# Outlier Detection

# Lets take 5 and 95 as the lower and upper quantile
q_low = data['bbox_area'].quantile(0.05)
q_high = data['bbox_area'].quantile(0.95)

outliers = data[(data['bbox_area'] < q_low) | (data['bbox_area'] > q_high)]

In [None]:
plt.figure(figsize=(8, 6))
sns.scatterplot(data=data, x='bbox_width', y='bbox_height', color='blue', label='Non-Outliers', alpha=0.6)
sns.scatterplot(data=outliers, x='bbox_width', y='bbox_height', color='red', label='Outliers', s=100, edgecolor='black')
plt.title('Bounding Box Outliers', fontsize=14)
plt.xlabel('Bounding Box Width', fontsize=12)
plt.ylabel('Bounding Box Height', fontsize=12)
plt.legend()
plt.grid(alpha=0.5)
plt.show()

In [None]:
for outlier in outliers[:7].iterrows():
    show_example_from_file(os.path.join(label_dir, outlier[1]['imagePath'].replace("jpg", "json")))

In [None]:
# For aspect ratio

data['aspect_ratio'] = data['imageHeight'] / data["imageWidth"]
data["aspect_ratio"].value_counts().to_frame()

In [None]:
# Grouping images into heights and areas

image_stats = data.groupby('imagePath')[['bbox_width', 'bbox_height', 'bbox_area']].agg(['mean', 'min', 'max', 'std'])
image_stats

In [None]:
flag_counts = data['flag'].value_counts()  # Count occurrences of each flag
flag_ratios = flag_counts / len(data)  # Get proportions

plt.figure(figsize=(4, 4))

explode_values = [0.1] * len(flag_ratios)  # Create explode values for all slices

flag_ratios.plot(
    kind='pie',
    autopct='%1.1f%%',
    colors=['lightgreen', 'orange', 'lightblue'],
    startangle=140,
    explode=explode_values,
    shadow=True,
    labels=flag_ratios.index  # Ensure labels are correctly displayed
)

plt.title('Exploded Flag Ratios')
plt.ylabel('') 
plt.show()


In [None]:
def filter_images_with_4_plus_keypoints(directory):
    """
    Filters images that have polygons with more than 4 keypoints.
    Returns a list of file paths for such images.
    """
    json_files = [f for f in os.listdir(directory) if f.endswith(".json")]
    filtered_files = []

    for filename in json_files:
        json_path = os.path.join(directory, filename)
        with open(json_path, 'r') as f:
            keypoints = json.load(f)
        
        if "shapes" not in keypoints:
            continue
        
        for shape in keypoints["shapes"]:
            if len(shape["points"]) > 4:
                filtered_files.append(json_path)
                break  # Stop checking other shapes in the same file
    
    return filtered_files

In [None]:
def show_example_with_truncated_bbox(filename: str):
    """
    Plots the original bounding box and the truncated bounding box side by side.
    """
    with open(filename, 'r') as f:
        keypoints = json.load(f)
    detections = keypoints["shapes"]
    image = Image.open(filename.replace("json", "jpg").replace("labels", "images"))
    
    fig, axes = plt.subplots(1, 2, figsize=(20, 8))
    
    # Plot original bounding box
    axes[0].imshow(image)
    axes[0].set_title("Original Bounding Box")
    for points in detections:
        x_coords, y_coords = zip(*points["points"])
        axes[0].plot(x_coords + (x_coords[0],), y_coords + (y_coords[0],), marker='o', linestyle='-', label=points["label"])
        axes[0].fill(x_coords, y_coords, alpha=0.3)  # Optional: fill the polygon
        centroid_x = sum(x_coords) / len(x_coords)
        centroid_y = sum(y_coords) / len(y_coords)
        axes[0].text(centroid_x, centroid_y, points["label"], fontsize=12, color='white', ha='center', va='center')
        axes[0].legend()
    
    # Plot truncated bounding box
    axes[1].imshow(image)
    axes[1].set_title("Truncated Bounding Box")
    for points in detections:
        handled_points = handle_keypoints(points["points"])
        x_coords, y_coords = zip(*handled_points)
        axes[1].plot(x_coords + (x_coords[0],), y_coords + (y_coords[0],), marker='o', linestyle='-', label=points["label"])
        axes[1].fill(x_coords, y_coords, alpha=0.3)  # Optional: fill the polygon
        centroid_x = sum(x_coords) / len(x_coords)
        centroid_y = sum(y_coords) / len(y_coords)
        axes[1].text(centroid_x, centroid_y, points["label"], fontsize=12, color='white', ha='center', va='center')
        axes[1].legend()
    
    plt.tight_layout()
    plt.show()

In [None]:
# Directory containing JSON files
label_dir = '/kaggle/input/surround-view-mmu-parking-slots-dataset/main_data/labels'

# Filter images with 4+ keypoints
filtered_files = filter_images_with_4_plus_keypoints(label_dir)

# Visualize the first few examples
for filename in filtered_files[:5]:  # Adjust the number of examples as needed
    show_example_with_truncated_bbox(filename)

In [None]:
from shapely.geometry import Polygon

def get_minimum_bounding_box(points):
    polygon = Polygon(points)
    mbr = polygon.minimum_rotated_rectangle
    return list(mbr.exterior.coords[:-1])  # Exclude duplicate closure point

In [None]:
def replace_extra_keypoints(points):
    extra_points=[]
    if len(points) > 4:
        # Calculate the centroid of the extra points
        extra_points = points[4:]
        centroid_x = sum(p[0] for p in extra_points) / len(extra_points)
        centroid_y = sum(p[1] for p in extra_points) / len(extra_points)
        # Replace extra points with the centroid
        points = points[:4] + [(centroid_x, centroid_y)]
    return points, extra_points

In [None]:
def handle_keypoints_with_bbox(points):
    if len(points) > 4:
        # Fit a minimum bounding rectangle
        points = get_minimum_bounding_box(points)
    return points

In [None]:
def handle_keypoints_with_replacement(points):
    if len(points) > 4:
        # Replace extra points with their centroid
        points = replace_extra_keypoints(points)
    return points

In [None]:
def compare_approaches(filename):
    with open(filename, 'r') as f:
        keypoints = json.load(f)
    detections = keypoints["shapes"]
    image = Image.open(filename.replace("json", "jpg").replace("labels", "images"))
    
    fig, axes = plt.subplots(1, 3, figsize=(30, 8))
    
    # Plot original bounding box
    axes[0].imshow(image)
    axes[0].set_title("Original Polygon")
    for points in detections:
        x_coords, y_coords = zip(*points["points"])
        axes[0].plot(x_coords + (x_coords[0],), y_coords + (y_coords[0],), marker='o', linestyle='-', label=points["label"])
        axes[0].fill(x_coords, y_coords, alpha=0.3)
        axes[0].legend()
    
    # Plot bounding box approach
    axes[1].imshow(image)
    axes[1].set_title("Bounding Box Approach")
    for points in detections:
        handled_points = handle_keypoints_with_bbox(points["points"])
        x_coords, y_coords = zip(*handled_points)
        axes[1].plot(x_coords + (x_coords[0],), y_coords + (y_coords[0],), marker='o', linestyle='-', label=points["label"])
        axes[1].fill(x_coords, y_coords, alpha=0.3)
        axes[1].legend()
    
    # Plot replacement point approach
    axes[2].imshow(image)
    axes[2].set_title("Replacement Point Approach")
    for points in detections:
        handled_points = handle_keypoints_with_replacement(points["points"])
        x_coords, y_coords = zip(*handled_points)
        axes[2].plot(x_coords + (x_coords[0],), y_coords + (y_coords[0],), marker='o', linestyle='-', label=points["label"])
        axes[2].fill(x_coords, y_coords, alpha=0.3)
        axes[2].legend()
    
    plt.tight_layout()
    plt.show()

In [None]:
def compare_original_and_replacement(filename):
    with open(filename, 'r') as f:
        keypoints = json.load(f)
    detections = keypoints["shapes"]
    image = Image.open(filename.replace("json", "jpg").replace("labels", "images"))
    
    fig, axes = plt.subplots(1, 2, figsize=(20, 8))
    
    # Plot original polygon
    axes[0].imshow(image)
    axes[0].set_title("Original Polygon")
    for points in detections:
        x_coords, y_coords = zip(*points["points"])
        axes[0].plot(x_coords + (x_coords[0],), y_coords + (y_coords[0],), marker='o', linestyle='-', label=points["label"])
        axes[0].fill(x_coords, y_coords, alpha=0.3)
        # Highlight extra keypoints
        if len(points["points"]) > 4:
            extra_x, extra_y = zip(*points["points"][4:])
            axes[0].scatter(extra_x, extra_y, color='red', s=100, label='Extra Keypoints')
        axes[0].legend()
    
    # Plot replacement polygon
    axes[1].imshow(image)
    axes[1].set_title("Replacement Polygon")
    for points in detections:
        handled_points, extra_points = replace_extra_keypoints(points["points"])
        x_coords, y_coords = zip(*handled_points)
        axes[1].plot(x_coords + (x_coords[0],), y_coords + (y_coords[0],), marker='o', linestyle='-', label=points["label"])
        axes[1].fill(x_coords, y_coords, alpha=0.3)
        # Highlight the replacement point
        if len(points["points"]) > 4:
            replacement_x, replacement_y = handled_points[-1]
            axes[1].scatter(replacement_x, replacement_y, color='green', s=100, label='Replacement Point')
        axes[1].legend()
    
    plt.tight_layout()
    plt.show()

In [None]:
# Directory containing JSON files
label_dir = '/kaggle/input/surround-view-mmu-parking-slots-dataset/main_data/labels'

# Filter images with 4+ keypoints
filtered_files = filter_images_with_4_plus_keypoints(label_dir)

# Visualize the first few examples
for filename in filtered_files[:5]:  # Adjust the number of examples as needed
    compare_original_and_replacement(filename)

**replace two closest points (extra points) with a single point (either their average or midpoint) to ensure that the polygon has exactly 4 points. This is different from the minimum bounding rectangle (MBR) approach, which creates a rectangle that encloses all points but doesn't necessarily reduce the number of points.**

In [None]:
import math

def find_closest_points(points):
    min_distance = float('inf')
    closest_pair = None
    
    for i in range(len(points)):
        for j in range(i + 1, len(points)):
            x1, y1 = points[i]
            x2, y2 = points[j]
            distance = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)
            if distance < min_distance:
                min_distance = distance
                closest_pair = (i, j)
    
    return closest_pair

In [None]:
def replace_closest_points(points):
    if len(points) > 4:
        # Find the two closest points
        i, j = find_closest_points(points)
        # Calculate the midpoint of the two closest points
        midpoint = (
            (points[i][0] + points[j][0]) / 2,
            (points[i][1] + points[j][1]) / 2
        )
        # Replace the two closest points with the midpoint
        new_points = [point for idx, point in enumerate(points) if idx not in (i, j)]
        new_points.append(midpoint)
        return new_points
    return points

In [None]:
def compare_original_and_replacement(filename):
    with open(filename, 'r') as f:
        keypoints = json.load(f)
    detections = keypoints["shapes"]
    image = Image.open(filename.replace("json", "jpg").replace("labels", "images"))
    
    fig, axes = plt.subplots(1, 2, figsize=(20, 8))
    
    # Plot original polygon
    axes[0].imshow(image)
    axes[0].set_title("Original Polygon")
    for points in detections:
        x_coords, y_coords = zip(*points["points"])
        axes[0].plot(x_coords + (x_coords[0],), y_coords + (y_coords[0],), marker='o', linestyle='-', label=points["label"])
        axes[0].fill(x_coords, y_coords, alpha=0.3)
        # Highlight extra keypoints
        if len(points["points"]) > 4:
            extra_x, extra_y = zip(*points["points"][4:])
            axes[0].scatter(extra_x, extra_y, color='red', s=100, label='Extra Keypoints')
        axes[0].legend()
    
    # Plot replacement polygon
    axes[1].imshow(image)
    axes[1].set_title("Replacement Polygon")
    for points in detections:
        handled_points = replace_closest_points(points["points"])
        x_coords, y_coords = zip(*handled_points)
        axes[1].plot(x_coords + (x_coords[0],), y_coords + (y_coords[0],), marker='o', linestyle='-', label=points["label"])
        axes[1].fill(x_coords, y_coords, alpha=0.3)
        # Highlight the replacement point
        if len(points["points"]) > 4:
            replacement_x, replacement_y = handled_points[-1]
            axes[1].scatter(replacement_x, replacement_y, color='green', s=100, label='Replacement Point')
        axes[1].legend()
    
    plt.tight_layout()
    plt.show()

In [None]:
# Directory containing JSON files
label_dir = '/kaggle/input/surround-view-mmu-parking-slots-dataset/main_data/labels'

# Filter images with 4+ keypoints
filtered_files = filter_images_with_4_plus_keypoints(label_dir)

# Visualize the first few examples
for filename in filtered_files[:5]:  # Adjust the number of examples as needed
    compare_original_and_replacement(filename)

# Removing these abnormalities

In [None]:
def is_valid_quadrilateral(points):
    """
    Checks if the given points form a valid quadrilateral.
    """
    if len(points) != 4:
        return False
    # Check for self-intersecting or degenerate polygons
    polygon = Polygon(points)
    return polygon.is_valid

In [None]:
def replace_closest_points(points):
    if len(points) > 4:
        # Find the two closest points
        i, j = find_closest_points(points)
        # Calculate the midpoint of the two closest points
        midpoint = (
            (points[i][0] + points[j][0]) / 2,
            (points[i][1] + points[j][1]) / 2
        )
        # Replace the two closest points with the midpoint
        new_points = [point for idx, point in enumerate(points) if idx not in (i, j)]
        new_points.append(midpoint)
        
        # Ensure the resulting polygon is a valid quadrilateral
        if is_valid_quadrilateral(new_points):
            return new_points
        else:
            # If invalid, try merging a different pair of points
            for idx1 in range(len(points)):
                for idx2 in range(idx1 + 1, len(points)):
                    if idx1 == i and idx2 == j:
                        continue  # Skip the original pair
                    midpoint = (
                        (points[idx1][0] + points[idx2][0]) / 2,
                        (points[idx1][1] + points[idx2][1]) / 2
                    )
                    new_points = [point for idx, point in enumerate(points) if idx not in (idx1, idx2)]
                    new_points.append(midpoint)
                    if is_valid_quadrilateral(new_points):
                        return new_points
            # If no valid quadrilateral can be formed, keep the original points
            return points[:4]
    return points

In [None]:
def compare_original_and_replacement(filename):
    with open(filename, 'r') as f:
        keypoints = json.load(f)
    detections = keypoints["shapes"]
    image = Image.open(filename.replace("json", "jpg").replace("labels", "images"))
    
    fig, axes = plt.subplots(1, 2, figsize=(20, 8))
    
    # Plot original polygon
    axes[0].imshow(image)
    axes[0].set_title("Original Polygon")
    for points in detections:
        x_coords, y_coords = zip(*points["points"])
        axes[0].plot(x_coords + (x_coords[0],), y_coords + (y_coords[0],), marker='o', linestyle='-', label=points["label"])
        axes[0].fill(x_coords, y_coords, alpha=0.3)
        # Highlight extra keypoints
        if len(points["points"]) > 4:
            extra_x, extra_y = zip(*points["points"][4:])
            axes[0].scatter(extra_x, extra_y, color='red', s=100, label='Extra Keypoints')
        axes[0].legend()
    
    # Plot replacement polygon
    axes[1].imshow(image)
    axes[1].set_title("Replacement Polygon")
    for points in detections:
        handled_points = replace_closest_points(points["points"])
        x_coords, y_coords = zip(*handled_points)
        axes[1].plot(x_coords + (x_coords[0],), y_coords + (y_coords[0],), marker='o', linestyle='-', label=points["label"])
        axes[1].fill(x_coords, y_coords, alpha=0.3)
        # Highlight the replacement point
        if len(points["points"]) > 4:
            replacement_x, replacement_y = handled_points[-1]
            axes[1].scatter(replacement_x, replacement_y, color='green', s=100, label='Replacement Point')
        axes[1].legend()
    
    plt.tight_layout()
    plt.show()

In [None]:
# Directory containing JSON files
label_dir = '/kaggle/input/surround-view-mmu-parking-slots-dataset/main_data/labels'

# Filter images with 4+ keypoints
filtered_files = filter_images_with_4_plus_keypoints(label_dir)

# Visualize the first few examples
for filename in filtered_files[:5]:  # Adjust the number of examples as needed
    compare_original_and_replacement(filename)

# Removing Intersection 


In [None]:
from shapely.geometry import Polygon

def order_points_convex(points):
    """
    Orders the points to form a convex quadrilateral (rectangle).
    """
    if len(points) != 4:
        return points  # Only works for quadrilaterals
    
    # Calculate the centroid of the points
    centroid_x = sum(p[0] for p in points) / 4
    centroid_y = sum(p[1] for p in points) / 4
    
    # Sort points by angle relative to the centroid
    def angle_from_centroid(point):
        return math.atan2(point[1] - centroid_y, point[0] - centroid_x)
    
    sorted_points = sorted(points, key=angle_from_centroid)
    return sorted_points

In [None]:
def is_convex(points):
    """
    Checks if the given points form a convex polygon.
    """
    if len(points) < 3:
        return False  # A polygon must have at least 3 points
    
    polygon = Polygon(points)
    return polygon.is_valid and polygon.convex_hull.equals(polygon)

In [None]:
def ensure_rectangle_shape(points):
    """
    Ensures the points form a rectangle-shaped polygon.
    """
    if len(points) != 4:
        return points  # Only works for quadrilaterals
    
    # Order the points to form a convex quadrilateral
    ordered_points = order_points_convex(points)
    
    # Check if the ordered points form a convex polygon
    if is_convex(ordered_points):
        return ordered_points
    else:
        # If not convex, adjust the points slightly
        adjusted_points = [(x + 0.01 * i, y + 0.01 * i) for i, (x, y) in enumerate(ordered_points)]
        return adjusted_points

In [None]:
import math

def find_closest_points(points):
    min_distance = float('inf')
    closest_pair = None
    
    for i in range(len(points)):
        for j in range(i + 1, len(points)):
            x1, y1 = points[i]
            x2, y2 = points[j]
            distance = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)
            if distance < min_distance:
                min_distance = distance
                closest_pair = (i, j)
    
    return closest_pair

In [None]:
def replace_closest_points(points):
    if len(points) > 4:
        # Find the two closest points
        i, j = find_closest_points(points)
        # Calculate the midpoint of the two closest points
        midpoint = (
            (points[i][0] + points[j][0]) / 2,
            (points[i][1] + points[j][1]) / 2
        )
        # Replace the two closest points with the midpoint
        new_points = [point for idx, point in enumerate(points) if idx not in (i, j)]
        new_points.append(midpoint)
        return new_points
    return points

In [None]:
def find_top_left_point(points):
    """
    Finds the top-left point in a list of points.
    The top-left point is the one with the smallest x and y values.
    """
    min_x = min(p[0] for p in points)
    min_y = min(p[1] for p in points)
    for p in points:
        if p[0] == min_x and p[1] == min_y:
            return p
    return None

def reorder_points(points):
    """
    Reorders the points so that the top-left point is first.
    """
    top_left = find_top_left_point(points)
    if top_left is None:
        return points  # If no top-left point is found, return the original points
    
    # Find the index of the top-left point
    index = points.index(top_left)
    
    # Reorder the points so that the top-left point is first
    reordered_points = points[index:] + points[:index]
    return reordered_points

In [None]:
from shapely.geometry import Polygon

def is_valid_polygon(points):
    """
    Checks if the given points form a valid polygon.
    """
    if len(points) < 3:
        return False  # A polygon must have at least 3 points
    polygon = Polygon(points)
    return polygon.is_valid

In [None]:
def reorder_points_to_avoid_intersections(points):
    """
    Reorders the points to create a simple polygon (no intersecting lines).
    """
    if is_valid_polygon(points):
        return points  # If already valid, return the points
    
    # Try reordering the points
    for i in range(len(points)):
        reordered_points = points[i:] + points[:i]
        if is_valid_polygon(reordered_points):
            return reordered_points
    
    # If no valid ordering is found, return the original points
    return points

In [None]:
def adjust_collinear_points(points):
    """
    Adjusts collinear points to create a valid polygon.
    """
    if len(points) < 3:
        return points  # A polygon must have at least 3 points
    
    # Check if all points are collinear
    def are_collinear(p1, p2, p3):
        return (p2[1] - p1[1]) * (p3[0] - p2[0]) == (p3[1] - p2[1]) * (p2[0] - p1[0])
    
    if all(are_collinear(points[i], points[i+1], points[i+2]) for i in range(len(points) - 2)):
        # Adjust the points slightly to make them non-collinear
        adjusted_points = [(x + 0.01 * i, y + 0.01 * i) for i, (x, y) in enumerate(points)]
        return adjusted_points
    return points

In [None]:
def ensure_polygon_validity(points):
    """
    Ensures the polygon is valid by adjusting the points if necessary.
    """
    points = [tuple(point) for point in points]
    
    # Remove duplicate points
    unique_points = list(dict.fromkeys(points))
    
    # Adjust collinear points
    adjusted_points = adjust_collinear_points(unique_points)
    
    # Reorder points to avoid intersections
    reordered_points = reorder_points_to_avoid_intersections(adjusted_points)
    
    return reordered_points

In [None]:
def compare_original_and_replacement(filename):
    with open(filename, 'r') as f:
        keypoints = json.load(f)
    detections = keypoints["shapes"]
    image = Image.open(filename.replace("json", "jpg").replace("labels", "images"))
    
    fig, axes = plt.subplots(1, 2, figsize=(20, 8))
    
    # Plot original polygon
    axes[0].imshow(image)
    axes[0].set_title("Original Polygon")
    for points in detections:
        # Reorder points to ensure the top-left point is first
        reordered_points = reorder_points(points["points"])
        x_coords, y_coords = zip(*reordered_points)
        axes[0].plot(x_coords + (x_coords[0],), y_coords + (y_coords[0],), marker='o', linestyle='-', label=points["label"])
        axes[0].fill(x_coords, y_coords, alpha=0.3)
        # Highlight extra keypoints
        if len(reordered_points) > 4:
            extra_x, extra_y = zip(*reordered_points[4:])
            axes[0].scatter(extra_x, extra_y, color='red', s=100, label='Extra Keypoints')
        axes[0].legend()
    
    # Plot replacement polygon
    axes[1].imshow(image)
    axes[1].set_title("Replacement Polygon")
    for points in detections:
        handled_points = replace_closest_points(points["points"])
        # Ensure polygon validity
        handled_points = ensure_rectangle_shape(handled_points)
        # Reorder points to ensure the top-left point is first
        reordered_handled_points = reorder_points(handled_points)
        x_coords, y_coords = zip(*handled_points)
        axes[1].plot(x_coords + (x_coords[0],), y_coords + (y_coords[0],), marker='o', linestyle='-', label=points["label"])
        axes[1].fill(x_coords, y_coords, alpha=0.3)
        # Highlight the replacement point
        if len(points["points"]) > 4:
            replacement_x, replacement_y = reordered_handled_points[-1]
            axes[1].scatter(replacement_x, replacement_y, color='green', s=100, label='Replacement Point')
        axes[1].legend()
    
    plt.tight_layout()
    plt.show()

In [None]:
# Directory containing JSON files
label_dir = '/kaggle/input/surround-view-mmu-parking-slots-dataset/main_data/labels'

# Filter images with 4+ keypoints
filtered_files = filter_images_with_4_plus_keypoints(label_dir)

# Visualize the first few examples
for filename in filtered_files[:20]:  # Adjust the number of examples as needed
    compare_original_and_replacement(filename)