# Traffic Sign Recognition

Lets get a overview of steps performing in traffic sign recognition.
- Data preprocessing using `open cv`.
- Dimensionality reduction using `PCA`.
- Training the model on [GTRSB](https://www.kaggle.com/datasets/meowmeowmeowmeowmeow/gtsrb-german-traffic-sign) traffic signs dataset and testing.
-----

In [1]:
import cv2
from time import time
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

image_vector_size = 30
classes = 10

### Data Preprocessing
Here we read some of the training images in `GTRSB` dataset and use them both for our train and test using opencv library.

In [2]:
class Preprocess():
	def __init__(self, path):
		"""
		Called when class is created with the variables of data
		"""
		self.path = path

	def get_data(self):
			images = []
			labels = []
			dir = os.listdir(self.path)
			for cl in range(classes):
				label = str(cl)
				img_dir_path = self.path + '/' + label
				img_dir = os.listdir(img_dir_path)
				for img in img_dir:
					if not img.startswith("."):
						img_path = img_dir_path + '/' + img
						image = cv2.imread(img_path)
						image = cv2.resize(image, (image_vector_size, image_vector_size), interpolation = cv2.INTER_AREA)
						image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
						images.append(np.array(image))
						labels.append(cl)
			return (images, labels)

In [3]:
training_path = "Data/Train"

t0 = time()
print("Started Reading Images ......")
preprocessor = Preprocess(training_path)
images_train, labels_train = preprocessor.get_data()
print("Images Read in % 0.3fs" % (time() - t0))

Started Reading Images ......
Images Read in  4.750s


In [4]:
X_train, X_test, y_train, y_test = train_test_split(images_train, labels_train, test_size=0.2, random_state=42)

### Dimensionality reduction

Here we perform dimensionality reduction of our feature vectors using PCA for both training and testing data to use KNN.

In [5]:
from sklearn.decomposition import PCA

n_components = 5

n_samples_train = np.array(X_train).shape[0]
n_samples_test = np.array(X_test).shape[0]

X_train = np.array(X_train).reshape(n_samples_train, image_vector_size*image_vector_size)
X_test = np.array(X_test).reshape(n_samples_test, image_vector_size*image_vector_size)


t0 = time()
pca = PCA(n_components = n_components, svd_solver ='randomized',
          whiten = True).fit(X_train)

X_train_reduced = pca.transform(X_train)
X_test_reduced = pca.transform(X_test)

print("done in % 0.3fs" % (time() - t0))

done in  2.637s


### Modeling
Lets predict our test data using simpler and lazier model 1-NN.

In [6]:
from sklearn.neighbors import KNeighborsClassifier

clf = KNeighborsClassifier(n_neighbors=1)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
print(f"Accuracy score is: {score}")

Accuracy score is: 0.9284253578732107
