# Importing Libraries
It is important to note that the code needs 3.4.1 version of keras to run.

To quickly run the notepbook, just run all the cells in order. A detailed explanation of the code and approach is provided on the way.

In [1]:
!pip install keras==3.4.1
!pip install cairocffi

Collecting cairocffi
  Downloading cairocffi-1.7.1-py3-none-any.whl.metadata (3.3 kB)
Downloading cairocffi-1.7.1-py3-none-any.whl (75 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: cairocffi
Successfully installed cairocffi-1.7.1


In [21]:
import random
import numpy as np
from numpy import asarray
import tensorflow as tf
import cairocffi as cairo
import json
import cv2
import keras
import numpy as np
from keras.models import load_model
import pandas as pd
import csv
import matplotlib.pyplot as plt
import scipy.interpolate as si
import math

In [3]:
print(keras.__version__)

3.4.1


# Constants

In [4]:
#  Shape dictionary
shape_dict = {
    0: 'line',
    1: 'circle',
    2: 'ellipse',
    3: 'triangle',
    4: 'square',
    5: 'rounded_rectangle',
    6: 'pentagon',
    7: 'hexagon',
    8: 'octagon',
    9: 'cloud',
    10: 'star',
    11: 'curve'
}

side = 256
small_side = 28

nb_classes = len(shape_dict)

# Task- I: Regularization

## Class to represent a polyline

In [5]:
"""
@original_polylines: the original polylines
@polylines: polylines that will be manipulated through the prediction process
@lines_used: the number of polylines in the subset
@probability: the probability of the predicted class
@predicted_class: the predicted class
@predicted_class_label: the predicted class label
@symmerty: the symmetry of the predicted class and is an array of 2 elements to represent the symmetry along the y-axis and x-axis
"""
class PolylinePrediction:
    def __init__(self, original_polylines, polylines, lines_used=1):
        self.original_polylines = original_polylines # original polylines
        self.polylines = polylines 
        self.lines_used = lines_used

    def add(self, probability, predicted_class, predicted_class_label, lines_used):
        self.probability = probability
        self.predicted_class = predicted_class
        self.predicted_class_label = predicted_class_label
        self.lines_used = lines_used
    
    def add_symmetry(self,symmetry):
        self.symmetry = symmetry

## Rasterizing Pipeline
functions to rasterize a polyline

In [6]:
"""
Function to convert a polyline to raster images of dimention = side*side padding and point_diameter are relative to the original 256x256 image.
"""
def vector_to_raster(vector_images, side=28, line_diameter=16, padding=16, bg_color=(0,0,0), fg_color=(1,1,1)):
    original_side = 256.

    surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, side, side)
    ctx = cairo.Context(surface)
    ctx.set_antialias(cairo.ANTIALIAS_BEST)
    ctx.set_line_cap(cairo.LINE_CAP_ROUND)
    ctx.set_line_join(cairo.LINE_JOIN_ROUND)
    ctx.set_line_width(line_diameter)

    # Scale to match the new size
    # Add padding at the edges for the point diameter
    # and add additional padding to account for antialiasing
    total_padding = padding * 2. + line_diameter
    new_scale = float(side) / float(original_side + total_padding)
    ctx.scale(new_scale, new_scale)
    ctx.translate(total_padding / 2., total_padding / 2.)
    j = 101
    raster_images = []
    for vector_image in vector_images:
        ctx.set_source_rgb(*bg_color)
        ctx.paint()

        bbox = np.hstack(vector_image).max(axis=1)
        offset = ((original_side, original_side) - bbox) / 2.
        offset = offset.reshape(-1,1)
        centered = [stroke + offset for stroke in vector_image]

        ctx.set_source_rgb(*fg_color)
        for xv, yv in centered:
            ctx.move_to(xv[0], yv[0])
            for x, y in zip(xv, yv):
                ctx.line_to(x, y)
            ctx.stroke()

        data = surface.get_data()
        raster_image = np.copy(np.asarray(data)[::4])
        raster_image = raster_image.reshape((side, side))
        raster_images.append(raster_image)

    return raster_images

In [7]:
"""
Function to convert a subset of polylines to raster images of dimention = side*side padding and point_diameter are relative to the original 256x256 image. 
"""
def vector_to_raster_subset(vector_images, side=28, point_diameter=16, padding=16, bg_color=(0,0,0), fg_color=(1,1,1),line_diameter=0):
    original_side = 256.
    surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, side, side)
    ctx = cairo.Context(surface)
    ctx.set_antialias(cairo.ANTIALIAS_BEST)

    ctx.set_line_width(point_diameter)

    # Scale to match the new size
    # Add padding at the edges for the point diameter
    # and add additional padding to account for antialiasing
    total_padding = padding * 2. + point_diameter
    new_scale = float(side) / float(original_side + total_padding)
    ctx.scale(new_scale, new_scale)
    ctx.translate(total_padding / 2., total_padding / 2.)

    raster_images = []
    for vector_image in vector_images:
        # Clear background
        ctx.set_source_rgb(*bg_color)
        ctx.paint()

        bbox = np.hstack(vector_image).max(axis=1)
        offset = ((original_side, original_side) - bbox) / 2.
        offset = offset.reshape(-1, 1)
        centered = [stroke + offset for stroke in vector_image]

        # Draw points
        ctx.set_source_rgb(*fg_color)
        for xv, yv in centered:
            for x, y in zip(xv, yv):
                ctx.arc(x, y, point_diameter / 2.0, 0, 2 * np.pi)  # Draw a circle representing the point
                ctx.fill()

        data = surface.get_data()
        raster_image = np.copy(np.asarray(data)[::4])
        raster_image = raster_image.reshape((side, side))
        raster_images.append(raster_image)

    return raster_images

## Point Simplification and Standardization Pipeline

To simplify and standardize the points following steps are done on any polyline:
1. Scaling and translating the points to ensure they can fir in a canvas of size 256x256
2. Resampling the points to reduce the number of points and ensure that the points are uniformly spaced
3. Ramer-Douglas-Peucker algorithm with epsilon=2.0 is used to simplify the points while preserving the original geometry of the polyline

In [10]:
"""Funtion to resample the points of the polyline."""
def resample_points(points, spacing=1.0):
    if len(points) < 2:
        return points
    resampled = [points[0]]
    for i in range(1, len(points)):
        p1, p2 = resampled[-1], points[i]
        dist = np.sqrt((p2[0] - p1[0])**2 + (p2[1] - p1[1])**2)
        num_points = int(dist / spacing)
        for j in range(1, num_points + 1):
            x_new = p1[0] + (p2[0] - p1[0]) * j / (num_points + 1)
            y_new = p1[1] + (p2[1] - p1[1]) * j / (num_points + 1)
            resampled.append((x_new, y_new))
    return resampled

"""Rammer-Douglas-Peucker algorithm to simplify the polyline."""
def rdp(points, epsilon):
    if len(points) < 3:
        return points

    def perpendicular_distance(point, line_start, line_end):
        if line_start == line_end:
            return np.linalg.norm(np.array(point) - np.array(line_start))
        return np.linalg.norm(np.cross(np.array(line_end) - np.array(line_start), np.array(line_start) - np.array(point))) / np.linalg.norm(np.array(line_end) - np.array(line_start))

    def rdp_recursion(points, epsilon):
        dmax = 0
        index = 0
        for i in range(1, len(points) - 1):
            d = perpendicular_distance(points[i], points[0], points[-1])
            if d > dmax:
                index = i
                dmax = d
        if dmax > epsilon:
            results1 = rdp_recursion(points[:index + 1], epsilon)
            results2 = rdp_recursion(points[index:], epsilon)
            return results1[:-1] + results2
        else:
            return [points[0], points[-1]]

    return rdp_recursion(points, epsilon)

"""Funtion to preprocess and simplify the polyline."""
def preprocess_points(data):
    # Convert to numpy array for easier manipulation
    x = np.array(data[0][0])
    y = np.array(data[0][1])

    # Define SVG canvas size
    svg_width = 256
    svg_height = 256

    # Calculate the bounding box of the points
    x_min, x_max = x.min(), x.max()
    y_min, y_max = y.min(), y.max()

    # Calculate width and height of the bounding box
    bbox_width = x_max - x_min
    bbox_height = y_max - y_min

    # Determine the scaling factor while maintaining aspect ratio
    x_scale = svg_width / bbox_width if bbox_width != 0 else 1
    y_scale = svg_height / bbox_height if bbox_height != 0 else 1
    scale = min(x_scale, y_scale)

    # Scale and translate points to fit within the SVG canvas while preserving aspect ratio
    scaled_points = [(scale * (xi - x_min) + (svg_width - scale * bbox_width) / 2,
                      scale * (yi - y_min) + (svg_height - scale * bbox_height) / 2) for xi, yi in zip(x, y)]

    # Resample points to ensure they are evenly spaced
    resampled_points = resample_points(scaled_points, spacing=1.0)

    # Simplify the polyline using the Ramer-Douglas-Peucker algorithm
    epsilon = 2.0
    simplified_points = rdp(resampled_points, epsilon)

    x_coords, y_coords = zip(*simplified_points)
    return [[list(map(int, x_coords)), list(map(int, y_coords))]]

## Preprocessing Pipeline
Functions to preprocess the data to make it ready for the model

In [3]:
"""This function resizes the original features X (256*256) into smaller dimensions."""
def resizing_X(X_orig, small_side):
    X = []
    for i in range(X_orig.shape[0]):
        X.append([])
        for j in range(X_orig.shape[1]):
            X[-1].append(cv2.resize(X_orig[i, j,], dsize=(small_side, small_side), interpolation=cv2.INTER_CUBIC))
    return np.array(X)

"""This function preprocesses the points of the polylines."""
def preprocessing_pipeline(X,s):

    # rasterize the polylines
    if s == True:
      X = vector_to_raster_subset(X, side=side, padding=16, bg_color=(1,1,1), fg_color=(0,0,0))
    else:
      # print(X)
      X = [preprocess_points(X[0])]
      X = vector_to_raster(X, side=side, padding=16, bg_color=(1,1,1), fg_color=(0,0,0))
    X = np.array(X)

    # If the input is a single sample, add an extra dimension to match batch processing
    if len(X.shape) == 3:
        X = X.reshape(1, *X.shape)

    # dimensionalality reduction
    X = resizing_X(X,small_side)

    # Flatten the first two dimensions (batch and number of images per batch)
    X = X.reshape(X.shape[0] * X.shape[1], small_side, small_side, 1)
    X = X.astype('float32') / 255.0

    return X

## Loading the trained CNN model
To classify a subset of polylines, a Convolutional Neural Network (CNN) model is trained on an augumented version of the QuickDraw dataset. The model is highly efficient, with a compact architecture comprising just 0.6 million parameters, while still achieving a robust accuracy of 93.01% on the test set.

The model architecture consists of four Conv2D layers with increasing filter sizes, followed by batch normalization and max-pooling layers to extract and downsample features. The model transitions to fully connected dense layers for classification, using dropout for regularization. The final output layer has 11 units, corresponding to the number of classes. 

In [9]:
# loading the model
model = load_model('/content/shape_classifier.keras')

  saveable.load_own_variables(weights_store.get(inner_path))


In [11]:
"""Determines whether a segment is a line or a curve based on the deviation from the actual straight line."""
def is_line(drawing, threshold=12.0):
    # Extract x and y coordinates
    x_coords, y_coords = drawing[0]

    # Calculate the start and end points
    start_point = np.array([x_coords[0], y_coords[0]])
    end_point = np.array([x_coords[-1], y_coords[-1]])

    # Calculate the line segment vector and its length
    line_vector = end_point - start_point
    line_length = np.linalg.norm(line_vector)

    # Calculate the perpendicular distance from each point to the line segment
    distances = []
    for (x, y) in zip(x_coords, y_coords):
        point = np.array([x, y])
        # Vector from start to the point
        point_vector = point - start_point
        # Projection length of point_vector onto line_vector
        proj_length = np.dot(point_vector, line_vector) / line_length
        # Closest point on the line segment to the point
        closest_point = start_point + proj_length * (line_vector / line_length)
        # Calculate the perpendicular distance
        distance = np.linalg.norm(point - closest_point)
        distances.append(distance)

    # Calculate the maximum perpendicular distance
    max_distance = max(distances)

    # Return whether the segment is a line based on the threshold
    return max_distance <= threshold

## Prediction Pipeline

The task of predicting the shape was quite complicated as a shape can be represented by one or N polylines. Thus, there is no such way to determine which polyline represents which shape. It is also important consider that one polyline can be a part of multiple shapes. 

Thus in this ambiguous situation, we experimentally discovered the following approach to predict the shape:
1. First, we make classification on every single polyline. Now we consider the following three cases:
    1. If the polyline is classified as a shape other than `line` or `curve` with `prediction probability(P) > 0.80`, then we consider it as a shape and add it to final output.
    2. If the polyline is classified as a `line` with `prediction probability > 0.70` and the `helper function 'is_line'` which checks if a polyline is line based on the deviation from the actual straight line, returns `True`, then we consider it as a line and add it to a set containing all lines.
    3. All other polylines are considered as `curves` and added to a set containing all curves.

2. Now, the task can be broken down into finding shapes in the set of `lines` and `curves`. This simplifies the process as a `curve can either be a curve, a circle, or an ellipse`. To deal with this, the only option is to try all subsets of polylines. If we have `N polylines` then we have `2^N subsets`. We can try all subsets and can select the predictions with maximum probability. However, creating all subsets is not feasible for large N and have large memory requirements. 

3. On further experimentation, we discovered that we can first try the subsets containing more polylines, i.e N, N-1, N-2, ... as they have a higher probability of being a shape. Thus, we first generate `threshold` number of subsets having polylines in decreasing order of N as they have a higher probability of being a shape.

4. We then make predictions on these subsets and whenever a shape is predicted with a `probability ϕ > 'tline'` for subset containing only `lines` and `ϕ > 'tcurve'` for subset containing only `curves`, then we add it to the final output. It is important to note that `tline` and `tcurve` are hyperparameters and can be tuned to get better results and the used values are also determined using hit and trial.

    1. We now remove the polylines which are part of the predicted shape from the remaining set of polylines and recompute the subsets.

    2. We repeat the process until we have no polylines left or no prediction was made in the current iteration. If no prediction was made in the current iteration, we simply add the remaining polylines to the final output as lines or curves.

**All the values of thresholds are determined experimetally.**

In [28]:
# thresholds for classifying shapes involving lines and curves
tline = 0.958
tcurve = 0.968

"""Function to predict the class of a single sample"""
def predict_sample(data, s):
    preprocessed_data = preprocessing_pipeline([data],s)
    predictions = model.predict(preprocessed_data)
    predicted_class = np.argmax(predictions)
    probability = np.max(predictions)
    return probability, predicted_class

"""Function to create predictions for single samples"""
def create_single_sample_predictions(polylines):
    final_output = []
    remaining_lines = []
    remaining_curves = []

    for sample in polylines:
        probability, predicted_class = predict_sample(sample.polylines, False)
        # print(f'Predicted class: {predicted_class}, Probability: {probability}')
        if predicted_class != 0 and probability > 0.80 and ((predicted_class != 7) or (predicted_class == 7 and probability > 0.980)):  # Not a line
            sample.add(probability, predicted_class, shape_dict[predicted_class], [1])
            final_output.append(sample)
        else:  # It’s a line or curve
            if predicted_class == 0 and probability > 0.70 and is_line(sample.original_polylines):  # Function to determine if it's a curve
                remaining_lines.append(sample)
            else:
                remaining_curves.append(sample)

    return final_output, remaining_lines, remaining_curves


"""Funcrion to generate all possible subsets of polylines to make predictions with maximum number of subsets = threshold"""
# subsets are strategically generated in the above mentioned way
def generate_subsequences(data, threshold = 120):
    subsequences = []
    n = len(data)

    # Generate all possible combinations, sorted by number of lines in descending order
    combinations = sorted(range(1, 1 << n), key=lambda x: bin(x).count('1'), reverse=True)

    for i in combinations:
        subset = [[[], []]]
        lines = []
        for j in range(n):
            if i & (1 << j):
                subset[0][0].extend(data[j].original_polylines[0][0])
                subset[0][1].extend(data[j].original_polylines[0][1])
                lines.append(j)  # add j for j'th line
        subsequences.append(PolylinePrediction(subset, subset, lines))

        # Stop when the threshold number of subsets is reached
        if len(subsequences) >= threshold:
            break

    return subsequences

"""Function to create predictions on subsets"""
def predict_subsets(remaining, final_output, rem_class):

    if len(remaining) > 15 :
        subsets = []
    else :
      subsets = generate_subsequences(remaining)
    subsets.sort(key=lambda x: len(x.lines_used), reverse=True)  # Sort by number of lines in subset, max first

    to_remove = set()  # Set to keep track of all indices to remove

    while len(remaining) > 0:
        found = 0

        for sample in subsets:
            probability, predicted_class = predict_sample(sample.polylines, True)
            if(predicted_class == 10 and len(sample.lines_used) > 1 and len(sample.lines_used) < 5):
                    continue
            
            # if the samples are curves then they can either be a curve, an ellipse or a circle
            # if the samples are lines then they can form the remaining shapes
            # They above said logic is implemented below with some experimental thresholds
            if (rem_class == 11 and (predicted_class == 1 or predicted_class == 11) and probability > tcurve) or (rem_class == 0 and predicted_class != 0 and probability > tline):
                sample.add(probability, predicted_class, shape_dict[predicted_class],sample.lines_used)
                final_output.append(sample)

                # Mark lines at indices for removal
                to_remove.update(sample.lines_used)

                # Recalculate the subsets after removal
                remaining = [item for i, item in enumerate(remaining) if i not in to_remove]
                subsets = generate_subsequences(remaining)
                subsets.sort(key=lambda x: len(x.lines_used), reverse=True)

                found = 1
                break  # Exit the for loop and start over with updated `remaining`
        
        # No predictions were made and hence the remaining polylines are added as they are
        # they can be either lines or curves depending on `rem_class`
        if found == 0:
            for sample in remaining:
                sample.add(1, rem_class, shape_dict[rem_class],sample.lines_used)
                final_output.append(sample)

            break  # No more subsets to process, exit the loop

"""Function to create predictions on given polylines"""
def create_predictions(polylines):
    final_output, remaining_lines, remaining_curves = create_single_sample_predictions(polylines)
    predict_subsets(remaining_lines, final_output,0)
    predict_subsets(remaining_curves, final_output,11)
    return final_output

In [13]:
"""read data from given csv file"""
def read_csv(csv_path):
    np_path_XYs = np.genfromtxt(csv_path, delimiter=',')
    path_XYs = []
    for i in np.unique(np_path_XYs[:, 0]):
        npXYs = np_path_XYs[np_path_XYs[:, 0] == i][:, 1:]
        XYs = []
        for j in np.unique(npXYs[:, 0]):
            XY = npXYs[npXYs[:, 0] == j][:, 1:]
            X_cords = XY[:, 0].tolist()
            Y_cords = XY[:, 1].tolist()
            XYs.append([X_cords, Y_cords])
        path_XYs.append(PolylinePrediction(XYs, XYs))
    return path_XYs

In [4]:
"""Function to create predictions on given csv file"""
def prediction_pipeline(csv_path):
    polylines = read_csv(csv_path)
    predictions = create_predictions(polylines)
    return predictions

## Regularization pipeline
It contains functions to regularize the output of the prediction pipeline. 

We have created a dedicated funtion to regularize each shape:

1. Rectangles: The code calculates the bounding box of the shape and constructs a regularized rectangle based on it, closing the loop to form a complete rectangle.

2. Circles: The code calculates a bounding box, finds the center and average diameter, and generates a circular shape using trigonometric functions to create evenly spaced points around the center.

3. Stars: The code identifies the bounding box, calculates the center and radius, and alternates between inner and outer radii to generate a regular star shape with evenly spaced points.

4. Curves: The code smooths the curve using B-spline interpolation with a higher smoothing factor, producing a smooth curve with a specified number of points.

Due to limmitation of time, we cannot implement regularization for all other shapes. However, we smoothen all other shapes using B-spline interpolation with a lower smoothing factor to produce a smoother version of the shape.

In [83]:
"""Function to regularize rectangles"""
def create_regularized_rectangle(drawing):
    x_coords, y_coords = drawing

    min_x, max_x = np.min(x_coords), np.max(x_coords)
    min_y, max_y = np.min(y_coords), np.max(y_coords)

    # Create a rectangle using the bounding box coordinates
    rectangle_x = [min_x, max_x, max_x, min_x, min_x]  # Closing the loop
    rectangle_y = [min_y, min_y, max_y, max_y, min_y]  # Closing the loop

    return [[rectangle_x, rectangle_y]]

"""Function to regularize circles"""
def create_regularized_circle(drawing):
    x_coords, y_coords = drawing

    min_x, max_x = np.min(x_coords), np.max(x_coords)
    min_y, max_y = np.min(y_coords), np.max(y_coords)

    # Calculate the length and breadth of the bounding box
    length = max_x - min_x
    breadth = max_y - min_y

    # Calculate the diameter of the circle as the mean of length and breadth
    diameter = (length + breadth) / 2

    # Calculate the center of the bounding box
    center_x = (min_x + max_x) / 2
    center_y = (min_y + max_y) / 2

    # Number of points to approximate the circle
    num_points = 100

    # Generate circle coordinates
    theta = np.linspace(0, 2 * np.pi, num_points)
    circle_x = center_x + (diameter / 2) * np.cos(theta)
    circle_y = center_y + (diameter / 2) * np.sin(theta)

    return [[circle_x.tolist(), circle_y.tolist()]]

"""Function to regularize star"""
def create_regularized_star(drawing):
    x_coords, y_coords = drawing

    min_x, max_x = np.min(x_coords), np.max(x_coords)
    min_y, max_y = np.min(y_coords), np.max(y_coords)

    # Calculate the center and the smaller dimension for the star's radius
    center_x = (min_x + max_x) / 2
    center_y = (min_y + max_y) / 2
    radius = min((max_x - min_x), (max_y - min_y)) / 2

    # Calculate star points using polar coordinates
    num_points = 5
    angle_step = np.pi / num_points
    star_x, star_y = [], []

    for i in range(2 * num_points):
        r = radius if i % 2 == 0 else radius * 0.4  # Alternate between outer and inner radius
        angle = i * angle_step
        star_x.append(center_x + r * np.cos(angle))
        star_y.append(center_y + r * np.sin(angle))

    # Close the loop by appending the first point at the end
    star_x.append(star_x[0])
    star_y.append(star_y[0])

    return [[star_x, star_y]]

"""Function to smoothen the curve"""
def regularize_curve(ndjson_object, num_points=10000, s=20.0):
    
    x, y = ndjson_object[0]
    x = np.array(x)
    y = np.array(y)

    # Calculate the B-spline representation with a higher smoothing factor
    tck, u = si.splprep([x, y], s=s, k=3)

    # Evaluate the B-spline and generate the smoothed points
    u_fine = np.linspace(0, 1, num_points)
    x_smooth, y_smooth = si.splev(u_fine, tck)

    return [[x_smooth.tolist(), y_smooth.tolist()]]

## Utility functions
Helper functions for plotting and input-output operations

In [16]:
"""Funtion to plot the regularized shapes"""
def plot_multiple_shapes(shapes):
    num_shapes = len(shapes)
    cols = 3  # Number of columns in the grid layout
    rows = (num_shapes + cols - 1) // cols  # Calculate the number of rows needed

    fig, axes = plt.subplots(rows, cols, figsize=(15, 5 * rows))
    axes = axes.flatten()  # Flatten the 2D array of axes for easy iteration

    for i, (shape) in enumerate(shapes):
        ax = axes[i]
        for stroke in shape:
            x_coords, y_coords = stroke
            ax.plot(x_coords, y_coords, color='black', linewidth=2)
        ax.invert_yaxis()  # Invert y-axis to match SVG coordinate system
        ax.set_title(f'Shape {i + 1}')
        ax.set_xlabel('X-axis')
        ax.set_ylabel('Y-axis')
        ax.grid(True)

    # Turn off axes for any unused subplots
    for j in range(num_shapes, len(axes)):
        axes[j].axis('off')

    plt.tight_layout()
    plt.savefig('multiple_shapes.png')
    plt.show()

In [17]:
"""Funtion to create a CSV file from the data as final output"""
def create_csv_from_data(data, csv_filename):
    with open(csv_filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        # Loop through the data and write to CSV
        for idx, array in enumerate(data):
            x_values, y_values = array[0][0], array[0][1]
            for x, y in zip(x_values, y_values):
                writer.writerow([idx, 0, x, y])

In [18]:
"""Function to read csv file and plot the polylines"""
def read_and_plot_csv(csv_path):
    np_path_XYs = np . genfromtxt(csv_path , delimiter = ',')
    path_XYs = []
    for i in np . unique ( np_path_XYs [: , 0]):
        npXYs = np_path_XYs [ np_path_XYs [: , 0] == i ][: , 1:]
        XYs = []
        for j in np . unique ( npXYs [: , 0]):
            XY = npXYs [ npXYs [: , 0] == j ][: , 1:]
            XYs.append(XY)
        path_XYs.append(XYs)

    fig , ax = plt . subplots(tight_layout = True , figsize =(8 , 8))
    for i , XYs in enumerate (path_XYs):
        for XY in XYs :
            ax.plot ( XY [: , 0] , XY [: , 1] , linewidth =2)
    ax.set_aspect('equal')
    plt.show()


# Classification Pipeline
The flow of main classification pipeline is as follows

In [80]:
"Main classification pipeline"
def pipeline(csv_path):
  
    # Plot the original data  
    print("Original Data")
    read_and_plot_csv(csv_path)
    print()
    print()

    # Predict the classes of the polylines
    predictions = prediction_pipeline(csv_path)
    for prediction in predictions:
        print(f'Predicted class: {prediction.predicted_class_label} with probability {prediction.probability} and polyline {prediction.polylines}')
    print()
    print()

    # Regularize the shapes on the basis of the predicted classes
    regularised_data = []
    for prediction in predictions:
        if prediction.predicted_class_label == 'square':
            prediction.original_polylines = create_regularized_rectangle(prediction.original_polylines[0])
        elif prediction.predicted_class_label == 'circle':
            prediction.original_polylines = create_regularized_circle(prediction.original_polylines[0])
        elif prediction.predicted_class_label == 'star':
            prediction.original_polylines = create_regularized_star(prediction.original_polylines[0])
        elif prediction.predicted_class_label == 'line':
            prediction.original_polylines = regularize_curve(prediction.original_polylines, s=1)
        else:
            prediction.original_polylines = regularize_curve(prediction.original_polylines, s=1)

    for prediction in predictions:
        regularised_data.append(prediction.original_polylines)

    # Plot the regularized shapes individually
    plot_multiple_shapes(regularised_data) 
    print()
    print()

    # Create a CSV file from the regularized data as final output
    create_csv_from_data(regularised_data, 'output.csv')

    # Plot the regularized shapes from the CSV file as a final output visualization
    read_and_plot_csv('/content/output.csv')   
    print()
    print() 
    
    return predictions

# Task II: Identification of symmetry in shapes

## Symmetry Detection Pipeline

In [None]:
"""Function to check the symmetry in the shapes"""
def is_symmetric(data, axis='both', threshold=0.3):
    # Extract x and y coordinates
    x = np.array(data[0][0])
    y = np.array(data[0][1])
    result = [0,0]
    
    # Function to check symmetry
    def check_symmetry(coords, mirror_point):
        left = coords[:len(coords)//2]
        right = coords[len(coords)//2:][::-1]
        
        if len(left) != len(right):
            right = right[1:]
        
        differences = np.abs(left - (2 * mirror_point - right))
        max_deviation = np.max(differences)
        
        return max_deviation <= threshold * (np.max(coords) - np.min(coords))
    
    # Check vertical symmetry (around y-axis)
    if axis in ['vertical', 'both']:
        mirror_x = (np.max(x) + np.min(x)) / 2
        if check_symmetry(x, mirror_x):
            result[0] = 1
    
    # Check horizontal symmetry (around x-axis)
    if axis in ['horizontal', 'both']:
        mirror_y = (np.max(y) + np.min(y)) / 2
        if check_symmetry(y, mirror_y):
            result[1] = 1
    
    return result

In [None]:
"""Function to plot symmetry in the shapes"""
def plot_symmetry(predictions, max_cols=3):
    n = len(predictions)
    cols = min(n, max_cols)
    rows = math.ceil(n / cols)
    
    fig, axes = plt.subplots(rows, cols, figsize=(4*cols, 4*rows))
    fig.tight_layout(pad=3.0)
    
    if n == 1:
        axes = np.array([axes])
    
    for i, pred in enumerate(predictions):
        ax = axes.flat[i] if n > 1 else axes
        
        # Extract x and y coordinates
        x = np.array(pred.original_polylines[0][0])
        y = np.array(pred.original_polylines[0][1])
        
        # Plot the curve
        ax.plot(x, y, 'b-', linewidth=2)
        ax.scatter(x, y, color='blue', s=20)
        
        # Plot symmetry lines if applicable
        if hasattr(pred, 'symmetry'):
            middle_x = (np.max(x) + np.min(x)) / 2
            middle_y = (np.max(y) + np.min(y)) / 2
            
            if pred.symmetry[0] == 1:  # Vertical symmetry
                ax.axvline(x=middle_x, color='r', linestyle='--', linewidth=1)
                ax.text(middle_x, ax.get_ylim()[1], 'Vertical symmetry', 
                        rotation=90, va='top', ha='right', fontsize=8, color='r')
                
            
            if pred.symmetry[1] == 1:  # Horizontal symmetry
                ax.axhline(y=middle_y, color='g', linestyle='--', linewidth=1)
                ax.text(ax.get_xlim()[1], middle_y, 'Horizontal symmetry', 
                        va='bottom', ha='right', fontsize=8, color='r')
        
        # Set title and labels
        ax.set_xlabel("X", fontsize=8)
        ax.set_ylabel("Y", fontsize=8)
        
        
        ax.grid(True, linestyle=':', alpha=0.6)
        ax.tick_params(axis='both', which='major', labelsize=8)
    
    # Remove any unused subplots
    for j in range(i+1, rows*cols):
        fig.delaxes(axes.flat[j])
    
    plt.show()

In [None]:
def symmetry_pipeline(regular_data):
    for shape in regular_data:
        if(shape.predicted_class_label == "curve" or shape.predicted_class_label == "line"):
            sym = is_symmetric(shape.original_polylines)
            shape.add_symmetry(sym)
        else:
            shape.add_symmetry([1,1])

    plot_symmetry(regular_data) 

# Main Program

In [None]:
# path to the csv file
csv_path = '/frag0.csv'

# Run the pipeline
regular_data = pipeline(csv_path);

#  Symmetry detection
symmetry_pipeline(regular_data);