In [1]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Loading the Iris dataset 

iris_df = pd.read_csv('Iris.csv')

# Displaying the first few rows of the dataset
print(iris_df.head())



   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa


In [22]:
iris_df.tail()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica
149,150,5.9,3.0,5.1,1.8,Iris-virginica


In [26]:
iris_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [27]:
iris_df = iris_df.drop('Id', axis=1)


In [28]:
iris_df.describe()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [29]:
# Separating features (X) and target variable (y)
X = iris_df.iloc[:, :-1].values
y = iris_df.iloc[:, -1].values

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the features (important for some algorithms)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model selection and training
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Support Vector Machine': SVC()
}

for name, model in models.items():
    # Training the model
    model.fit(X_train, y_train)

    # Making predictions
    y_pred = model.predict(X_test)

    # Evaluating the model
    accuracy = accuracy_score(y_test, y_pred)
    confusion_mat = confusion_matrix(y_test, y_pred)
    classification_rep = classification_report(y_test, y_pred)

    # Displaying results
    print(f"Model: {name}")
    print(f"Accuracy: {accuracy:.4f}")
    print("Confusion Matrix:\n", confusion_mat)
    print("Classification Report:\n", classification_rep)
    print("------------------------------")


Model: Logistic Regression
Accuracy: 1.0000
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30

------------------------------
Model: Decision Tree
Accuracy: 1.0000
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00  

In [34]:

new_data = np.array([[6.0, 3.0, 4.0, 1.3],   # Features for Iris-versicolor
                     [5.5, 2.5, 5.0, 2.0],   # Features for Iris-virginica
                     [4.8, 3.2, 1.5, 0.1]])  # Features for Iris-setosa

# Ensuring the new data has the same number of features as the original training data
if new_data.shape[1] < 4:
    # Adding placeholders for the missing features (assuming they are constant values, e.g., 0)
    missing_features = np.zeros((new_data.shape[0], 4 - new_data.shape[1]))
    new_data = np.hstack([new_data, missing_features])

# Standardizing the new data using the same scaler used for training data
new_data_standardized = scaler.transform(new_data)

# Creating a DataFrame with feature names for better readability
new_data_df = pd.DataFrame(new_data_standardized, columns=['sepal length', 'sepal width', 'petal length', 'petal width'])

# Iterating through each model to make predictions on the new data
for name, model in models.items():
    # Making predictions
    predictions = model.predict(new_data_standardized)

    # Displaying the model name and corresponding predictions
    print(f"Model: {name}")
    print(f"Predictions:")
    print(new_data_df)
    print(f"Predicted Classes: {predictions}")
    print("------------------------------")


Model: Logistic Regression
Predictions:
   sepal length  sepal width  petal length  petal width
0      0.232620    -0.129349      0.156255     0.156605
1     -0.376865    -1.254122      0.729670     1.089570
2     -1.230143     0.320560     -1.277280    -1.442764
Predicted Classes: ['Iris-versicolor' 'Iris-virginica' 'Iris-setosa']
------------------------------
Model: Decision Tree
Predictions:
   sepal length  sepal width  petal length  petal width
0      0.232620    -0.129349      0.156255     0.156605
1     -0.376865    -1.254122      0.729670     1.089570
2     -1.230143     0.320560     -1.277280    -1.442764
Predicted Classes: ['Iris-versicolor' 'Iris-virginica' 'Iris-setosa']
------------------------------
Model: K-Nearest Neighbors
Predictions:
   sepal length  sepal width  petal length  petal width
0      0.232620    -0.129349      0.156255     0.156605
1     -0.376865    -1.254122      0.729670     1.089570
2     -1.230143     0.320560     -1.277280    -1.442764
Predicted Cl

In [35]:
# Printing the standardized new data
print("Standardized New Data:")
print(new_data_standardized)

# Iterating through each model to make predictions on the new data
for name, model in models.items():
    # Making predictions
    predictions = model.predict(new_data_standardized)

    # Displaying the model name and corresponding predictions
    print(f"Model: {name}")
    print(f"Predictions: {predictions}")
    print("------------------------------")


Standardized New Data:
[[ 0.23261993 -0.12934896  0.15625537  0.15660491]
 [-0.37686461 -1.25412249  0.72966956  1.08957031]
 [-1.23014297  0.32056046 -1.27728011 -1.44276436]]
Model: Logistic Regression
Predictions: ['Iris-versicolor' 'Iris-virginica' 'Iris-setosa']
------------------------------
Model: Decision Tree
Predictions: ['Iris-versicolor' 'Iris-virginica' 'Iris-setosa']
------------------------------
Model: K-Nearest Neighbors
Predictions: ['Iris-versicolor' 'Iris-virginica' 'Iris-setosa']
------------------------------
Model: Support Vector Machine
Predictions: ['Iris-versicolor' 'Iris-virginica' 'Iris-setosa']
------------------------------


In [36]:
#Checking with new set of data
new_data = np.array([[5.4, 3.7, 1.5, 0.2],   # Features for Iris-setosa
                     [5.5, 2.5, 5.0, 2.0],   # Features for Iris-virginica
                     [6, 2.2, 4, 1]])  # Features for Iris-versicolor

# Ensuring the new data has the same number of features as the original training data
if new_data.shape[1] < 4:
    # Adding placeholders for the missing features (assuming they are constant values, e.g., 0)
    missing_features = np.zeros((new_data.shape[0], 4 - new_data.shape[1]))
    new_data = np.hstack([new_data, missing_features])

# Standardizing the new data using the same scaler used for training data
new_data_standardized = scaler.transform(new_data)

# Creating a DataFrame with feature names for better readability
new_data_df = pd.DataFrame(new_data_standardized, columns=['sepal length', 'sepal width', 'petal length', 'petal width'])

# Iterating through each model to make predictions on the new data
for name, model in models.items():
    # Making predictions
    predictions = model.predict(new_data_standardized)

    # Displaying the model name and corresponding predictions
    print(f"Model: {name}")
    print(f"Predictions:")
    print(new_data_df)
    print(f"Predicted Classes: {predictions}")
    print("------------------------------")


Model: Logistic Regression
Predictions:
   sepal length  sepal width  petal length  petal width
0     -0.498762     1.445334     -1.277280    -1.309484
1     -0.376865    -1.254122      0.729670     1.089570
2      0.232620    -1.928987      0.156255    -0.243237
Predicted Classes: ['Iris-setosa' 'Iris-virginica' 'Iris-versicolor']
------------------------------
Model: Decision Tree
Predictions:
   sepal length  sepal width  petal length  petal width
0     -0.498762     1.445334     -1.277280    -1.309484
1     -0.376865    -1.254122      0.729670     1.089570
2      0.232620    -1.928987      0.156255    -0.243237
Predicted Classes: ['Iris-setosa' 'Iris-virginica' 'Iris-versicolor']
------------------------------
Model: K-Nearest Neighbors
Predictions:
   sepal length  sepal width  petal length  petal width
0     -0.498762     1.445334     -1.277280    -1.309484
1     -0.376865    -1.254122      0.729670     1.089570
2      0.232620    -1.928987      0.156255    -0.243237
Predicted Cl