# Import necessary libraries

In [46]:
import pandas as pd
import numpy as np

# Step 1: Load the Iris dataset from a CSV file

In [47]:
file_path = 'iris.csv'  
df = pd.read_csv(file_path)
df = df[['SepalLength','SepalWidth','Species']]

# Print the loaded dataset

In [48]:
print("Loaded Iris Dataset:")
print(df)

Loaded Iris Dataset:
     SepalLength  SepalWidth         Species
0            5.1         3.5     Iris-setosa
1            4.9         3.0     Iris-setosa
2            4.7         3.2     Iris-setosa
3            4.6         3.1     Iris-setosa
4            5.0         3.6     Iris-setosa
..           ...         ...             ...
145          6.7         3.0  Iris-virginica
146          6.3         2.5  Iris-virginica
147          6.5         3.0  Iris-virginica
148          6.2         3.4  Iris-virginica
149          5.9         3.0  Iris-virginica

[150 rows x 3 columns]


# Step 2: Take a sample input (Sepal Length, Sepal Width)

In [49]:
sample_sepal_length = 5.0
sample_sepal_width = 3.5

# Calculate Euclidean distance between the sample and each row in the dataset

In [50]:
df['Distance'] = np.sqrt((df['SepalLength'] - sample_sepal_length) ** 2 + (df['SepalWidth'] - sample_sepal_width) ** 2)

# Print the dataset with the new 'Distance' column

In [51]:
print("\nDataset with Distance to the Sample:")
print(df)


Dataset with Distance to the Sample:
     SepalLength  SepalWidth         Species  Distance
0            5.1         3.5     Iris-setosa  0.100000
1            4.9         3.0     Iris-setosa  0.509902
2            4.7         3.2     Iris-setosa  0.424264
3            4.6         3.1     Iris-setosa  0.565685
4            5.0         3.6     Iris-setosa  0.100000
..           ...         ...             ...       ...
145          6.7         3.0  Iris-virginica  1.772005
146          6.3         2.5  Iris-virginica  1.640122
147          6.5         3.0  Iris-virginica  1.581139
148          6.2         3.4  Iris-virginica  1.204159
149          5.9         3.0  Iris-virginica  1.029563

[150 rows x 4 columns]


# Step 3: Rank the rows based on the 'Distance' column

In [52]:
df['Rank'] = df['Distance'].rank(method='min')

# Print the dataset with the new 'Rank' column

In [53]:
print("\nDataset with Distance and Rank:")
print(df)


Dataset with Distance and Rank:
     SepalLength  SepalWidth         Species  Distance   Rank
0            5.1         3.5     Iris-setosa  0.100000    3.0
1            4.9         3.0     Iris-setosa  0.509902   35.0
2            4.7         3.2     Iris-setosa  0.424264   28.0
3            4.6         3.1     Iris-setosa  0.565685   38.0
4            5.0         3.6     Iris-setosa  0.100000    5.0
..           ...         ...             ...       ...    ...
145          6.7         3.0  Iris-virginica  1.772005  128.0
146          6.3         2.5  Iris-virginica  1.640122  114.0
147          6.5         3.0  Iris-virginica  1.581139  110.0
148          6.2         3.4  Iris-virginica  1.204159   81.0
149          5.9         3.0  Iris-virginica  1.029563   65.0

[150 rows x 5 columns]


# Step 4: Find the nearest neighbors and predict the group for the sample
# Sort by distance and get the top nearest neighbor(s)
 # You can change k to consider more neighbors

In [54]:
k=5
nearest_neighbors = df.nsmallest(k, 'Distance')

# Print the nearest neighbors

In [55]:
print(f"\nTop {k} Nearest Neighbors:")
print(nearest_neighbors)


Top 5 Nearest Neighbors:
    SepalLength  SepalWidth      Species  Distance  Rank
40          5.0         3.5  Iris-setosa       0.0   1.0
43          5.0         3.5  Iris-setosa       0.0   1.0
0           5.1         3.5  Iris-setosa       0.1   3.0
17          5.1         3.5  Iris-setosa       0.1   3.0
4           5.0         3.6  Iris-setosa       0.1   5.0


# Predict the most common species among the nearest neighbors

In [56]:
predicted_species = nearest_neighbors['Species'].mode()[0]

# Print the predicted species for the sample

In [57]:
print(f"\nThe predicted species for the sample with Sepal Length = {sample_sepal_length} "
      f"and Sepal Width = {sample_sepal_width} is: {predicted_species}")


The predicted species for the sample with Sepal Length = 5.0 and Sepal Width = 3.5 is: Iris-setosa


In [64]:
data = pd.read_csv(file_path)

In [67]:
x= data[['SepalLength','SepalWidth']]
y= data['Species'] 

In [70]:
from sklearn.model_selection import train_test_split  
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.25, random_state=0)

In [71]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=5).fit(x_train, y_train)

In [72]:
ypred = classifier.predict(x_test)

In [73]:
from sklearn import metrics
i = 0
print ("\n-------------------------------------------------------------------------")
print ('%-25s %-25s %-25s' % ('Original Label', 'Predicted Label', 'Correct/Wrong'))
print ("-------------------------------------------------------------------------")
for label in y_test:
    print ('%-25s %-25s' % (label, ypred[i]), end="")
    if (label == ypred[i]):
        print (' %-25s' % ('Correct'))
    else:
        print (' %-25s' % ('Wrong'))
    i = i + 1
print ("-------------------------------------------------------------------------")
print("\nConfusion Matrix:\n",metrics.confusion_matrix(y_test, ypred))  
print ("-------------------------------------------------------------------------")
print("\nClassification Report:\n",metrics.classification_report(y_test, ypred)) 
print ("-------------------------------------------------------------------------")
print('Accuracy of the classifer is %0.2f' % metrics.accuracy_score(y_test,ypred))
print ("-------------------------------------------------------------------------")


-------------------------------------------------------------------------
Original Label            Predicted Label           Correct/Wrong            
-------------------------------------------------------------------------
Iris-virginica            Iris-versicolor           Wrong                    
Iris-versicolor           Iris-versicolor           Correct                  
Iris-setosa               Iris-setosa               Correct                  
Iris-virginica            Iris-virginica            Correct                  
Iris-setosa               Iris-setosa               Correct                  
Iris-virginica            Iris-virginica            Correct                  
Iris-setosa               Iris-setosa               Correct                  
Iris-versicolor           Iris-virginica            Wrong                    
Iris-versicolor           Iris-versicolor           Correct                  
Iris-versicolor           Iris-virginica            Wrong              