<a href="https://colab.research.google.com/github/Megha0321/coursera-test/blob/main/iris.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, make_scorer
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

from warnings import filterwarnings

filterwarnings(action="ignore")

In [3]:
# Load the Iris dataset
df = pd.read_csv("Iris-Dataset.csv")

In [4]:
# Display basic information about the dataset
print(df.info())
print(df.shape)
print(df.head())
print(df.describe())
print(df.isnull().sum())
print(df["Species"].value_counts())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   SepalLengthCm  150 non-null    float64
 1   SepalWidthCm   150 non-null    float64
 2   PetalLengthCm  150 non-null    float64
 3   PetalWidthCm   150 non-null    float64
 4   Species        150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
None
(150, 5)
   SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0            5.1           3.5            1.4           0.2  Iris-setosa
1            4.9           3.0            1.4           0.2  Iris-setosa
2            4.7           3.2            1.3           0.2  Iris-setosa
3            4.6           3.1            1.5           0.2  Iris-setosa
4            5.0           3.6            1.4           0.2  Iris-setosa
       SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm
count     150.000000

In [5]:
# Print the range of each feature
feature_ranges = df.agg(
    {
        "SepalLengthCm": ["min", "max"],
        "SepalWidthCm": ["min", "max"],
        "PetalLengthCm": ["min", "max"],
        "PetalWidthCm": ["min", "max"],
    }
)

print("\nRange of Features:")
print(feature_ranges)


Range of Features:
     SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm
min            4.3           2.0            1.0           0.1
max            7.9           4.4            6.9           2.5


In [6]:
# Prepare data for training
y = df["Species"]
X = df.drop("Species", axis=1)

In [7]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [8]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
# Define classifiers
classifiers = {
    "k-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5)
}

In [10]:
# Evaluate each classifier using cross-validation and on the test set
for clf_name, clf in classifiers.items():
    # Cross-validation
    scores = cross_val_score(clf, X_train, y_train, cv=5, scoring="accuracy")
    print(f"\n{clf_name} -> Cross-Validation Accuracy: {scores.mean()} ")

    # Train the classifier on the entire training set
    clf.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = clf.predict(X_test)

    # Evaluate the performance of the classifier on the test set
    accuracy = accuracy_score(y_test, y_pred)

    # Print results
    print(f"{clf_name} -> Test Set Accuracy: {accuracy * 100}%")


k-Nearest Neighbors -> Cross-Validation Accuracy: 0.9523809523809523 
k-Nearest Neighbors -> Test Set Accuracy: 97.77777777777777%


In [11]:
# Choose the best-performing classifier
best_classifier_name = max(
    classifiers,
    key=lambda k: cross_val_score(
        classifiers[k], X_train, y_train, cv=5, scoring="accuracy"
    ).mean(),
)
best_classifier = classifiers[best_classifier_name]

In [12]:
# Train the best classifier on the entire training set
best_classifier.fit(X_train, y_train)

In [13]:
# Make predictions on the test set
y_pred = best_classifier.predict(X_test)

# Evaluate the performance of the best classifier
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

In [14]:
# Print results
print(f"\nBest Classifier: {best_classifier_name}")
print("Confusion Matrix:")
print(cm)
print(f"Accuracy of the Model: {accuracy * 100}%")


Best Classifier: k-Nearest Neighbors
Confusion Matrix:
[[16  0  0]
 [ 0 17  1]
 [ 0  0 11]]
Accuracy of the Model: 97.77777777777777%


Range of Features:
|               | SepalLengthCm | SepalWidthCm | PetalLengthCm | PetalWidthCm |
| ------------- | ------------- | ------------ | ------------- | ------------ |
| **Min**       | 4.3           | 2.0          | 1.0           | 0.1          |
| **Max**       | 7.9           | 4.4          | 6.9           | 2.5          |



In [None]:
# Take user input for feature values
sepal_length = float(input("Enter Sepal Length (in cm): "))
sepal_width = float(input("Enter Sepal Width (in cm): "))
petal_length = float(input("Enter Petal Length (in cm): "))
petal_width = float(input("Enter Petal Width (in cm): "))

In [None]:
# Create a DataFrame with user input
user_input = pd.DataFrame(
    {
        "SepalLengthCm": [sepal_length],
        "SepalWidthCm": [sepal_width],
        "PetalLengthCm": [petal_length],
        "PetalWidthCm": [petal_width],
    }
)

In [None]:
# Standardize the user input using the same scaler
user_input_scaled = scaler.transform(user_input)

# Use the best classifier to predict the class
predicted_class = best_classifier.predict(user_input_scaled)[0]

In [None]:
print(f"\nPredicted Class: {predicted_class}")


Predicted Class: Iris-setosa
