# iris classification

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

X, y = load_iris(return_X_y=True)

#visualization
plt.scatter(X[y == 0][:, 0], X[y == 0][:, 1], color='red', label='setosa')
plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], color='black', label='versicolor')
plt.scatter(X[y == 2][:, 0], X[y == 2][:, 1], color='blue', label='virginica')

plt.xlabel('sepal length (cm)')
plt.ylabel('sepal width (cm)')
plt.legend()
plt.show()
# the other features can be visualized in the same way
plt.scatter(X[y == 0][:, 2], X[y == 0][:, 3], color='red', label='setosa')
plt.scatter(X[y == 1][:, 2], X[y == 1][:, 3], color='black', label='versicolor')
plt.scatter(X[y == 2][:, 2], X[y == 2][:, 3], color='blue', label='virginica')
plt.xlabel('petal length (cm)')
plt.ylabel('petal width (cm)')
plt.legend()
plt.show()


#splitting the data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(f'Classification report:\n{classification_report(y_test, y_pred)}')
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print(f'Confusion matrix:\n{confusion_matrix(y_test, y_pred)}')

#cluster and report
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
X, y = load_iris(return_X_y=True)
X = X[:, 2:]
plt.scatter(X[:, 0], X[:, 1])
plt.xlabel('petal length (cm)')
plt.ylabel('petal width (cm)')
plt.show()
kmeans = KMeans(n_clusters=3)
kmeans.fit(X)
y_pred = kmeans.predict(X)
plt.scatter(X[y_pred == 0][:, 0], X[y_pred == 0][:, 1], color='red', label='cluster 1')
plt.scatter(X[y_pred == 1][:, 0], X[y_pred == 1][:, 1], color='black', label='cluster 2')
plt.scatter(X[y_pred == 2][:, 0], X[y_pred == 2][:, 1], color='blue', label='cluster 3')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], color='yellow', label='centroids')
plt.xlabel('petal length (cm)')
plt.ylabel('petal width (cm)')
plt.legend()
plt.show()
print(f'Silhouette score: {silhouette_score(X, y_pred)}')