In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [4]:


df = pd.read_csv('iris_dataset.csv')

X = df.drop(columns='species')
y = df['species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')


Accuracy: 100.00%


In [3]:

new_record = [[5.1, 3.5, 1.4, 0.2]] 

predicted_class = knn.predict(new_record)
print(f'The predicted species for the record is: {predicted_class[0]}')

The predicted species for the record is: setosa




In [None]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
from scipy.stats import mode


In [None]:
df = pd.read_csv('iris_dataset.csv')

# Separate features and target
X = df.drop(columns='species')
y = df['species']

# Encode target labels to numerical values
le = LabelEncoder()
y_encoded = le.fit_transform(y)  # species → 0, 1, 2

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train K-Means with 3 clusters (as there are 3 species)


In [None]:
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X_train)

# Predict cluster labels for test set
y_cluster_pred = kmeans.predict(X_test)

# Map cluster labels to actual labels (cluster 0 ≠ class 0 by default)
# We do this by assigning the most common actual label to each cluster


#

In [None]:
# Create a mapping from cluster to actual label
labels=np.zeros_like(y_cluster_pred)
for i in range(3):
    mask=(kmeans.labels_==i)
    labels[i]=mode(y_train[mask],keepdims=False).mode

# Map predicted cluster labels to actual labels
y_pred_mapped = [labels[cluster] for cluster in y_cluster_pred]

# Calculate accuracy


In [None]:
accuracy = accuracy_score(y_test, y_pred_mapped)
print(f'K-Means Accuracy (after label mapping): {accuracy * 100:.2f}%')


new_record = [[5.1, 3.5, 1.4, 0.2]] 

predicted_class = kmeans.predict(new_record)
print(f'The predicted species for the record is: {le.inverse_transform(predicted_class)[0]}')


In [None]:
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

# Reduce feature dimensions to 2D for visualization
pca = PCA(n_components=2)
X_test_2D = pca.fit_transform(X_test)

# Plotting the clusters with predicted (mapped) labels
plt.figure(figsize=(8, 6))
scatter = plt.scatter(X_test_2D[:, 0], X_test_2D[:, 1], 
                      c=y_pred_mapped, cmap='coolwarm')

plt.title("K-Means Clustering on Iris Dataset (2D PCA)")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.legend(*scatter.legend_elements(), title="Predicted Species")
plt.grid(True)
plt.show()
