# Patch Perfect

## Import Necessary Libraries

In [None]:
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
import inference
import os
from roboflow import Roboflow
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import joblib
import csv
import matplotlib.pyplot as plt

## Load Model from RoboFlow and Predict Potholes

### Run Object Detection on Train Images

In [None]:
from roboflow import Roboflow
import os
import numpy as np
import cv2
from google.colab.patches import cv2_imshow
import inference
# Load the model
rf = Roboflow(api_key="Ujq7MVtbrywY83z9oDsn")
project = rf.workspace("pothole").project("stick4")
model = project.version(1).model

# Define the confidence and overlap thresholds
confidence_threshold = 0.001
overlap_threshold = 0.01

# Directory where the images are located
image_directory = "drive/MyDrive/data/train_images/"

# Directory where the predictions should be saved
predictions_directory = "drive/MyDrive/data/predictions/"

# Create the predictions directory if it doesn't exist, and clear any existing files in it
if os.path.exists(predictions_directory):
    # Remove all files in the directory
    for filename in os.listdir(predictions_directory):
        file_path = os.path.join(predictions_directory, filename)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
        except Exception as e:
            print(f"Failed to delete {file_path}: {e}")
else:
    # Create the directory if it doesn't exist
    os.makedirs(predictions_directory)

# Get the list of all image files in the directory
image_files = sorted([f for f in os.listdir(image_directory) if f.endswith('.jpg')])

# Arrays to normalise data later
maxPothole = np.zeros(4)
maxStick1 = np.zeros(4)
maxStick2 = np.zeros(4)

for image_file in image_files:
    image_path = os.path.join(image_directory, image_file)
    print(image_file)

    try:
        # Perform inference on the image with the specified confidence and overlap thresholds
        prediction = model.predict(image_path, confidence=confidence_threshold, overlap=overlap_threshold).json()
        print(prediction)
        # Load the image using OpenCV
        img = cv2.imread(image_path)

        if img is None:
            raise FileNotFoundError(f"Image {image_path} not found.")

        # Get image dimensions for normalization
        img_height, img_width, _ = img.shape

        # Separate the predictions by class
        pothole_boxes = []
        stick_boxes = []
        max_confidence = 0
        max_confidence1 = 0
        max_confidence2 = 0



        for bbox in prediction['predictions']:
            if bbox['class'] == '0':
                pothole_boxes.append(bbox)
            elif bbox['class'] == '1' or bbox['class'] == '2':
                stick_boxes.append(bbox)

        # Create a text file with the same name as the image file (but with a .txt extension)
        output_file_name = os.path.join(predictions_directory, f"{os.path.splitext(image_file)[0]}.txt")
        with open(output_file_name, "w") as file:
          pothole_array = np.array([-1,-1,-1,-1])
          l1_array = np.array([-1,-1,-1,-1])
          l2_array = np.array([-1,-1,-1,-1])

          # Process the 'pothole' boxes first, if they exist
          for bbox in pothole_boxes:
              confidence = bbox['confidence']
              if confidence > max_confidence:
                max_confidence = confidence
                x1 = (bbox['x'] - (bbox['width'] / 2)) / img_width
                if x1 > maxPothole[0]:
                  maxPothole[0] = x1
                y1 = (bbox['y'] - (bbox['height'] / 2)) / img_height
                if y1 > maxPothole[1]:
                  maxPothole[1] = y1
                width = bbox['width'] / img_width
                if width > maxPothole[2]:
                  maxPothole[2] = width
                height = bbox["height"] / img_height
                if height > maxPothole[3]:
                  maxPothole[3] = height
                box_num = bbox['class_id']
                pothole_array = np.array([x1, y1, width, height])

              # Denormalize for drawing purposes
              start_point = (int(x1 * img_width), int(y1 * img_height))
              end_point = (int((x1 + width) * img_width), int((y1 + height) * img_height))
              color = (0, 255, 0)  # Green color for bounding box
              thickness = 2  # Thickness of the bounding box
              img = cv2.rectangle(img, start_point, end_point, color, thickness)

          # Process the 'stick' boxes
          for bbox in stick_boxes:
              box_num = bbox['class_id']
              confidence = bbox['confidence']
              match box_num:
                case 1:
                  if confidence > max_confidence1:
                    max_confidence1 = confidence
                    x1 = (bbox['x'] - (bbox['width'] / 2)) / img_width
                    if x1 > maxStick1[0]:
                      maxStick1[0] = x1
                    y1 = (bbox['y'] - (bbox['height'] / 2)) / img_height
                    if y1 > maxStick1[1]:
                      maxStick1[1] = y1
                    width = bbox['width'] / img_width
                    if width > maxStick1[2]:
                      maxStick1[2] = width
                    height = bbox["height"] / img_height
                    if height > maxStick1[3]:
                      maxStick1[3] = height
                    l1_array = np.array([x1, y1, width, height])
                case 2:
                  if confidence > max_confidence2:
                    max_confidence2 = confidence
                    x1 = (bbox['x'] - (bbox['width'] / 2)) / img_width
                    if x1 > maxStick2[0]:
                      maxStick2[0] = x1
                    y1 = (bbox['y'] - (bbox['height'] / 2)) / img_height
                    if y1 > maxStick2[1]:
                      maxStick2[1] = y1
                    width = bbox['width'] / img_width
                    if width > maxStick2[2]:
                      maxStick2[2] = width
                    height = bbox["height"] / img_height
                    if height > maxStick2[3]:
                      maxStick2[3] = height
                    l2_array = np.array([x1, y1, width, height])

              # Denormalize for drawing purposes
              start_point = (int(x1 * img_width), int(y1 * img_height))
              end_point = (int((x1 + width) * img_width), int((y1 + height) * img_height))
              if box_num == 1:
                  color = (255, 255, 0)  # Yellow color for class 1
              elif box_num == 2:
                  color = (0, 255, 255)  # Cyan color for class 2

              thickness = 2  # Thickness of the bounding box
              img = cv2.rectangle(img, start_point, end_point, color, thickness)

          # Display the image with bounding boxes
          cv2_imshow(img)
          # cv2_waitKey(0)
          # cv2.destroyAllWindows()

          # Write the normalized bounding box data to the file
          file.write(f"0, {pothole_array[0]:.6f}, {pothole_array[1]:.6f}, {pothole_array[2]:.6f}, {pothole_array[3]:.6f}\n")
          file.write(f"1, {l1_array[0]:.6f}, {l1_array[1]:.6f}, {l1_array[2]:.6f}, {l1_array[3]:.6f}\n")
          file.write(f"2, {l2_array[0]:.6f}, {l2_array[1]:.6f}, {l2_array[2]:.6f}, {l2_array[3]:.6f}\n")

    except Exception as e:
        # Print error message and continue with the next image
        print(f"Error processing {image_path}: {e}")
        continue

### Run Object Detection on Test Images

In [None]:
from roboflow import Roboflow
import os
import numpy as np
import cv2
from google.colab.patches import cv2_imshow
import inference
# Load the model
rf = Roboflow(api_key="Ujq7MVtbrywY83z9oDsn")
project = rf.workspace("pothole").project("stick4")
model = project.version(1).model

# Define the confidence and overlap thresholds
confidence_threshold = 0.001
overlap_threshold = 0.01

# Directory where the images are located
image_directory = "drive/MyDrive/data/test_images/"

# Directory where the predictions should be saved
predictions_directory = "drive/MyDrive/data/predictions_test/"

# Create the predictions directory if it doesn't exist, and clear any existing files in it
if os.path.exists(predictions_directory):
    # Remove all files in the directory
    for filename in os.listdir(predictions_directory):
        file_path = os.path.join(predictions_directory, filename)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
        except Exception as e:
            print(f"Failed to delete {file_path}: {e}")
else:
    # Create the directory if it doesn't exist
    os.makedirs(predictions_directory)

# Get the list of all image files in the directory
image_files = sorted([f for f in os.listdir(image_directory) if f.endswith('.jpg')])

for image_file in image_files:
    image_path = os.path.join(image_directory, image_file)
    print(image_file)

    try:
        # Perform inference on the image with the specified confidence and overlap thresholds
        prediction = model.predict(image_path, confidence=confidence_threshold, overlap=overlap_threshold).json()
        print(prediction)
        # Load the image using OpenCV
        img = cv2.imread(image_path)

        if img is None:
            raise FileNotFoundError(f"Image {image_path} not found.")

        # Get image dimensions for normalization
        img_height, img_width, _ = img.shape

        # Separate the predictions by class
        pothole_boxes = []
        stick_boxes = []
        max_confidence = 0
        max_confidence1 = 0
        max_confidence2 = 0



        for bbox in prediction['predictions']:
            if bbox['class'] == '0':
                pothole_boxes.append(bbox)
            elif bbox['class'] == '1' or bbox['class'] == '2':
                stick_boxes.append(bbox)

        # Create a text file with the same name as the image file (but with a .txt extension)
        output_file_name = os.path.join(predictions_directory, f"{os.path.splitext(image_file)[0]}.txt")
        with open(output_file_name, "w") as file:
          pothole_array = np.array([-1,-1,-1,-1])
          l1_array = np.array([-1,-1,-1,-1])
          l2_array = np.array([-1,-1,-1,-1])

          # Process the 'pothole' boxes first, if they exist
          for bbox in pothole_boxes:
              confidence = bbox['confidence']
              if confidence > max_confidence:
                max_confidence = confidence
                x1 = (bbox['x'] - (bbox['width'] / 2)) / img_width
                if x1 > maxPotholeT[0]:
                  maxPotholeT[0] = x1
                y1 = (bbox['y'] - (bbox['height'] / 2)) / img_height
                if y1 > maxPotholeT[1]:
                  maxPotholeT[1] = y1
                width = bbox['width'] / img_width
                if width > maxPotholeT[2]:
                  maxPotholeT[2] = width
                height = bbox["height"] / img_height
                if height > maxPotholeT[3]:
                  maxPotholeT[3] = height
                box_num = bbox['class_id']
                pothole_array = np.array([x1, y1, width, height])

              # Denormalize for drawing purposes
              start_point = (int(x1 * img_width), int(y1 * img_height))
              end_point = (int((x1 + width) * img_width), int((y1 + height) * img_height))
              color = (0, 255, 0)  # Green color for bounding box
              thickness = 2  # Thickness of the bounding box
              img = cv2.rectangle(img, start_point, end_point, color, thickness)

          # Process the 'stick' boxes
          for bbox in stick_boxes:
              box_num = bbox['class_id']
              confidence = bbox['confidence']
              match box_num:
                case 1:
                  if confidence > max_confidence1:
                    max_confidence1 = confidence
                    x1 = (bbox['x'] - (bbox['width'] / 2)) / img_width
                    if x1 > maxStick1T[0]:
                      maxStick1T[0] = x1
                    y1 = (bbox['y'] - (bbox['height'] / 2)) / img_height
                    if y1 > maxStick1T[1]:
                      maxStick1T[1] = y1
                    width = bbox['width'] / img_width
                    if width > maxStick1T[2]:
                      maxStick1T[2] = width
                    height = bbox["height"] / img_height
                    if height > maxStick1T[3]:
                      maxStick1T[3] = height
                    l1_array = np.array([x1, y1, width, height])
                case 2:
                  if confidence > max_confidence2:
                    max_confidence2 = confidence
                    x1 = (bbox['x'] - (bbox['width'] / 2)) / img_width
                    if x1 > maxStick2T[0]:
                      maxStick2T[0] = x1
                    y1 = (bbox['y'] - (bbox['height'] / 2)) / img_height
                    if y1 > maxStick2T[1]:
                      maxStick2T[1] = y1
                    width = bbox['width'] / img_width
                    if width > maxStick2T[2]:
                      maxStick2T[2] = width
                    height = bbox["height"] / img_height
                    if height > maxStick2T[3]:
                      maxStick2T[3] = height
                    l2_array = np.array([x1, y1, width, height])

              # Denormalize for drawing purposes
              start_point = (int(x1 * img_width), int(y1 * img_height))
              end_point = (int((x1 + width) * img_width), int((y1 + height) * img_height))
              if box_num == 1:
                  color = (255, 255, 0)  # Yellow color for class 1
              elif box_num == 2:
                  color = (0, 255, 255)  # Cyan color for class 2

              thickness = 2  # Thickness of the bounding box
              img = cv2.rectangle(img, start_point, end_point, color, thickness)

          # Display the image with bounding boxes
          cv2_imshow(img)
          # cv2_waitKey(0)
          # cv2.destroyAllWindows()

          # Write the normalized bounding box data to the file
          file.write(f"0, {pothole_array[0]:.6f}, {pothole_array[1]:.6f}, {pothole_array[2]:.6f}, {pothole_array[3]:.6f}\n")
          file.write(f"1, {l1_array[0]:.6f}, {l1_array[1]:.6f}, {l1_array[2]:.6f}, {l1_array[3]:.6f}\n")
          file.write(f"2, {l2_array[0]:.6f}, {l2_array[1]:.6f}, {l2_array[2]:.6f}, {l2_array[3]:.6f}\n")

    except Exception as e:
        # Print error message and continue with the next image
        print(f"Error processing {image_path}: {e}")
        continue

### Normalise Train Predictions

In [None]:
# Function to normalize a row based on its class
def normalize_row(row, max_values):
    # Normalize each value by the corresponding max value, skip normalization for -1
    normalized_values = [(value / max_val if max_val != 0 and value != -1 else value)
                         for value, max_val in zip(row[1:], max_values)]
    return [row[0]] + normalized_values

# Directory where the .txt files are located
predictions_directory = "drive/MyDrive/data/predictions/"

# Directory to save normalized .txt files
normalized_directory = "drive/MyDrive/data/predictions_train_normalized/"
if os.path.exists(normalized_directory):
    # Remove all files in the directory
    for filename in os.listdir(normalized_directory):
        file_path = os.path.join(normalized_directory, filename)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
        except Exception as e:
            print(f"Failed to delete {file_path}: {e}")
else:
    # Create the directory if it doesn't exist
    os.makedirs(normalized_directory)

# Loop through each .txt file in the directory
for txt_file in os.listdir(predictions_directory):
    if txt_file.endswith(".txt"):
        txt_file_path = os.path.join(predictions_directory, txt_file)
        normalized_txt_file_path = os.path.join(normalized_directory, txt_file)

        # Open the file to read and normalize
        with open(txt_file_path, "r") as infile, open(normalized_txt_file_path, "w") as outfile:
            for line in infile:
                values = list(map(float, line.strip().split(",")))
                box_id = int(values[0])
                row_values = np.array(values)

                if box_id == 0:
                    max_values = maxPothole
                elif box_id == 1:
                    max_values = maxStick1
                elif box_id == 2:
                    max_values = maxStick2
                else:
                    continue  # Skip rows with unknown box IDs

                # Normalize the row values
                normalized_row = normalize_row(row_values, max_values)

                # Write the normalized values to the output file
                outfile.write(",".join(map(str, normalized_row)) + "\n")

### Normalise Test Predictions

In [None]:
# Function to normalize a row based on its class
def normalize_row(row, max_values):
    # Normalize each value by the corresponding max value, skip normalization for -1
    normalized_values = [(value / max_val if max_val != 0 and value != -1 else value)
                         for value, max_val in zip(row[1:], max_values)]
    return [row[0]] + normalized_values

# Directory where the .txt files are located
predictions_directory = "drive/MyDrive/data/predictions_test/"

# Directory to save normalized .txt files
normalized_directory = "drive/MyDrive/data/predictions_test_normalized/"
if os.path.exists(normalized_directory):
    # Remove all files in the directory
    for filename in os.listdir(normalized_directory):
        file_path = os.path.join(normalized_directory, filename)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
        except Exception as e:
            print(f"Failed to delete {file_path}: {e}")
else:
    # Create the directory if it doesn't exist
    os.makedirs(normalized_directory)

# Loop through each .txt file in the directory
for txt_file in os.listdir(predictions_directory):
    if txt_file.endswith(".txt"):
        txt_file_path = os.path.join(predictions_directory, txt_file)
        normalized_txt_file_path = os.path.join(normalized_directory, txt_file)

        # Open the file to read and normalize
        with open(txt_file_path, "r") as infile, open(normalized_txt_file_path, "w") as outfile:
            for line in infile:
                values = list(map(float, line.strip().split(",")))
                box_id = int(values[0])
                row_values = np.array(values)

                if box_id == 0:
                    max_values = maxPotholeT
                elif box_id == 1:
                    max_values = maxStick1T
                elif box_id == 2:
                    max_values = maxStick2T
                else:
                    continue  # Skip rows with unknown box IDs

                # Normalize the row values
                normalized_row = normalize_row(row_values, max_values)

                # Write the normalized values to the output file
                outfile.write(",".join(map(str, normalized_row)) + "\n")

In [None]:
# Function to load prediction data from a .txt file and convert it into a feature vector
def load_prediction_data(file_path):
    with open(file_path, "r") as file:
        # Read all lines, skipping the header
        lines = file.readlines()

    # Extract and flatten the numeric values from each line
    features = []
    for line in lines:
        values = line.strip().split(",")[1:]  # Skip the first column (box number)
        features.extend([float(v) for v in values])

    return np.array(features)

## KNN

In [None]:
from sklearn.neighbors import KNeighborsRegressor

predictions_directory = "drive/MyDrive/data/predictions/"
csv_file = "drive/MyDrive/data/train_labels.csv"

labels_df = pd.read_csv(csv_file)
# print(labels_df)

# Prepare the dataset by matching prediction data with labels
data = []
labels = []

for index, row in labels_df.iterrows():
    txt_file_name = f"p{int(row['Pothole number'])}.txt"
    txt_file_path = os.path.join(predictions_directory, txt_file_name)

    if os.path.exists(txt_file_path):
        # Load the prediction data
        features = load_prediction_data(txt_file_path)
        data.append(features)

        # Get the corresponding bags_used value
        labels.append(row['Bags used '])


data = np.array(data)[:,:8]
print(data.shape)

wp = data[:,0]
hp = data[:,1]
ws = data[:,2]
hs = data[:,3]

# data = [wp, hp, ws, hs, data[:,4], data[:,5], data[:,6], data[:,7]]
print(data[:,1])



dataInput = list(zip(ws, hs, data[:,6], data[:,7]))

# print(dataInput)
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(dataInput, labels)

predictions = np.zeros_like(labels)
for i in range(len(labels)):
    input_data = np.array(dataInput[i]).reshape(1, -1)
    # print(input_data)
    predictions[i] = knn.predict(input_data)
# print(predictions)

plt.plot(labels, c='blue')
plt.plot(predictions, c='red')
plt.show()

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import matplotlib.pyplot as plt

# Calculate predictions
predictions = np.zeros_like(labels)
for i in range(len(labels)):
    input_data = np.array(dataInput[i]).reshape(1, -1)
    predictions[i] = knn.predict(input_data)

# Calculate mean squared error
mse = mean_squared_error(labels, predictions)
print(f"Mean Squared Error: {mse}")

# Calculate R^2 error
r2 = r2_score(labels, predictions)
print(f"R^2 Score: {r2}")

x=np.linspace(0,len(labels),len(labels))

# Plot the results
plt.scatter(x,labels, c='blue', label='True Values')
plt.scatter(x,predictions, c='red', label='Predictions')
plt.legend()
plt.show()


In [None]:
predictions_directory = "drive/MyDrive/data/predictions/"
predictions_test_directory = "/content/drive/MyDrive/data/predictions_test"
csv_file = "drive/MyDrive/data/test_labels.csv"

labels_df = pd.read_csv(csv_file)
# print(labels_df)

# Prepare the dataset by matching prediction data with labels
data = []
labels = []
image = []

for index, row in labels_df.iterrows():
    txt_file_name = f"p{int(row['Pothole number'])}.txt"
    txt_file_path = os.path.join(predictions_test_directory, txt_file_name)
    image.append(txt_file_name)

    if os.path.exists(txt_file_path):
        # Load the prediction data
        features = load_prediction_data(txt_file_path)
        data.append(features)

        # Get the corresponding bags_used value
        labels.append(row['Bags used '])

print(data)


data = np.array(data)[:,:8]
print(data.shape)

wp = data[:,0]
hp = data[:,1]
ws = data[:,2]
hs = data[:,3]

# data = [wp, hp, ws, hs, data[:,4], data[:,5], data[:,6], data[:,7]]
print(data[:,1])



dataInput = list(zip(ws, hs, data[:,6], data[:,7]))

# print(dataInput)
# knn = KNeighborsRegressor(n_neighbors=5)
# knn.fit(dataInput, labels)

predictions = np.zeros(30)
for i in range(len(predictions)):
    input_data = np.array(dataInput[i]).reshape(1, -1)
    # print(input_data)
    predictions[i] = np.round(knn.predict(input_data),2)
    print(f"{image[i][1:-4]},{predictions[i]:.2f}")
# print(predictions)

# plt.plot(labels, c='blue')
plt.plot(predictions, c='red')
plt.show()