### Importing Libraries

In [43]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder 
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

## Data Preprocessing

### Loading the Dataset

In [7]:
data = pd.read_csv('archive/final_data.csv')
data.head()

Unnamed: 0,red,green,blue,label
0,20,139,240,Blue
1,174,83,72,Brown
2,144,249,131,Green
3,168,25,156,Pink
4,30,182,136,Green


### Encoding Labels

In [14]:
#Convert categorical labels "red" into numeric labels 0 and "blue" into 1 and so on
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(data['label'])
data['encoded_label'] = encoded_labels
data.head()

Unnamed: 0,red,green,blue,label,encoded_label
0,20,139,240,Blue,1
1,174,83,72,Brown,2
2,144,249,131,Green,3
3,168,25,156,Pink,6
4,30,182,136,Green,3


### Splitting Features

In [12]:
features = data[['green', 'blue', 'red']]
labels = data['encoded_label']
features

Unnamed: 0,green,blue,red
0,139,240,20
1,83,72,174
2,249,131,144
3,25,156,168
4,182,136,30
...,...,...,...
5047,26,26,26
5048,27,27,27
5049,28,28,28
5050,29,29,29


In [25]:
labels

0       1
1       2
2       3
3       6
4       3
       ..
5047    0
5048    0
5049    0
5050    0
5051    0
Name: encoded_label, Length: 5052, dtype: int32

In [23]:
print(labels.count()) #total count of labels
labels.value_counts() #count of each label 

5052


encoded_label
3     1457
1     1107
6      579
7      553
2      376
10     285
8      236
5      205
4      174
0       51
9       29
Name: count, dtype: int64

### Splitting the Dataset 

In [16]:
#Train test = 20% and validation set = 80%
X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=1)


## Training KNN Model

In [31]:
knn_model = KNeighborsClassifier(n_neighbors=18)
knn_model


In [32]:
knn_model.fit(X_train, y_train)


In [33]:
predicted_labels = knn_model.predict(X_val)
predicted_labels


array([ 3,  1,  7, ..., 10,  7,  6])

In [29]:
accuracy = accuracy_score(y_val, predicted_labels)
accuracy

0.884272997032641

### Mapping predicted labes to color names

In [35]:
def get_color_name(predicted_label):
    label_to_color_mapping = data[['label', 'encoded_label']].drop_duplicates().set_index('encoded_label')['label'].to_dict()
    return label_to_color_mapping.get(predicted_label[0])

### Finding the Optimal value of K

In [40]:
optimal_accuracy = 0
optimal_k = 1
#to find the best k value odd numbers are used
for k in range(1, 21,2):
    temp_model = KNeighborsClassifier(n_neighbors=k)
    temp_model.fit(X_train, y_train)
    temp_predictions = temp_model.predict(X_val)
    current_accuracy = accuracy_score(y_val, temp_predictions)
    if current_accuracy > optimal_accuracy:
        optimal_accuracy = current_accuracy
        optimal_k = k

print(f'Best Accuracy: {optimal_accuracy * 100:.2f}% with k={optimal_k}')


Best Accuracy: 88.33% with k=17


## Training SVM Model

In [44]:
svm_model = SVC(kernel='linear')  # You can experiment with different kernels like 'rbf' or 'poly'


In [45]:
svm_model.fit(X_train, y_train)


### Predicting and Calculating Accuracy:

In [53]:
predicted_labels = svm_model.predict(X_val)
accuracy = accuracy_score(y_val, predicted_labels)
#predicted_labels
accuracy

0.887240356083086

### Finding the Best Kernel:

In [54]:
best_accuracy = 0
best_kernel = None
kernels = ['linear', 'rbf', 'poly']
# Linear kernel , Radial Basis Function kernel and Polynomial kernel are used
# to find the best kernel
for kernel in kernels:
    temp_model = SVC(kernel=kernel)
    temp_model.fit(X_train, y_train)
    temp_predictions = temp_model.predict(X_val)
    current_accuracy = accuracy_score(y_val, temp_predictions)
    if current_accuracy > best_accuracy:
        best_accuracy = current_accuracy
        best_kernel = kernel

print(f'Best Kernel: {best_kernel} with Accuracy: {best_accuracy * 100:.2f}%')

Best Kernel: rbf with Accuracy: 89.32%
