### Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 

### Loading data

In [2]:
iris = pd.read_csv('./IRIS.csv')
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


### Exploring the dataset

In [3]:
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


--- No missing values

In [4]:
iris.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


Confirming the content of the categorical column

In [5]:
print(f'We have {iris['species'].nunique()} categories in the "species" column. They are: {iris['species'].unique()}')

We have 3 categories in the "species" column. They are: ['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']


# Classification

### Splitting into training, validation, and test sets

In [6]:
from sklearn.model_selection import train_test_split

In [10]:
X = iris.drop(columns = 'species', axis = 1)
y = iris['species']

In [14]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size= 0.2, random_state = 42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size= 0.5, random_state = 42)

print(f'Shape of Train set: {X_train.shape}')
print(f'Shape of Validation set: {X_val.shape}')
print(f'Shape of Test set: {X_test.shape}')

Shape of Train set: (120, 4)
Shape of Validation set: (15, 4)
Shape of Test set: (15, 4)


### Basic Algorithms

In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [21]:
# Instantiate models
lr = LogisticRegression()
tree = DecisionTreeClassifier()
vec = SVC()

# Add them to a list
model_list = [lr,tree, vec]
model_names = ['Logistic Regression', 'Decision Tree', 'Support Vector Machines']

# Iterate over the models
for model in model_list:
    # fit the model
    model.fit(X_train, y_train)
    # predict for the validation test
    y_pred = model.predict(X_val)
    # append them to lists
    print('-----')
    print('-----')
    print(model_names[model_list.index(model)])
    print("Accuracy:", accuracy_score(y_val, y_pred))
    print("Classification Report:\n", classification_report(y_val, y_pred))

-----
-----
Logistic Regression
Accuracy: 1.0
Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00         5
Iris-versicolor       1.00      1.00      1.00         3
 Iris-virginica       1.00      1.00      1.00         7

       accuracy                           1.00        15
      macro avg       1.00      1.00      1.00        15
   weighted avg       1.00      1.00      1.00        15

-----
-----
Decision Tree
Accuracy: 1.0
Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00         5
Iris-versicolor       1.00      1.00      1.00         3
 Iris-virginica       1.00      1.00      1.00         7

       accuracy                           1.00        15
      macro avg       1.00      1.00      1.00        15
   weighted avg       1.00      1.00      1.00        15

-----
-----
Support Vector Machines
Accuracy: 1.0
Classificati

In [22]:
# Iterate over the models
for model in model_list:
    # fit the model
    model.fit(X_train, y_train)
    # predict for the validation test
    y_pred = model.predict(X_test)
    # append them to lists
    print('-----')
    print('-----')
    print(model_names[model_list.index(model)])
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))

-----
-----
Logistic Regression
Accuracy: 1.0
Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00         5
Iris-versicolor       1.00      1.00      1.00         6
 Iris-virginica       1.00      1.00      1.00         4

       accuracy                           1.00        15
      macro avg       1.00      1.00      1.00        15
   weighted avg       1.00      1.00      1.00        15

-----
-----
Decision Tree
Accuracy: 1.0
Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00         5
Iris-versicolor       1.00      1.00      1.00         6
 Iris-virginica       1.00      1.00      1.00         4

       accuracy                           1.00        15
      macro avg       1.00      1.00      1.00        15
   weighted avg       1.00      1.00      1.00        15

-----
-----
Support Vector Machines
Accuracy: 1.0
Classificati