## Iris Flower Classification

### Classification algorithms used: Logistic Regression, Support Vector Machine, Random Forest Classifier

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,precision_score,recall_score,f1_score

#### Loading the dataset.

In [2]:
data = pd.read_csv('raw.githubusercontent.com_amankharwal_Website-data_master_IRIS.csv')

unique_species = data['species'].unique()
print("Unique Species:")
print(unique_species)

Unique Species:
['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']


There are three unique species so it is a `multi category classification algorithm.`

#### Initialising the predictor matrix and the dependent variable.

In [3]:
X = data.drop('species', axis=1)
y = data['species']

#### Splitting the dataset into training and testing sets in 70:30 ratio.

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=123)

#### Feature Scaling is done as scaling the features ensures that all features are on a similar scale, which can lead to better convergence and performance of the model. 

In [5]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Logistic Regression

In [6]:
model_log = LogisticRegression()

# Train the model
model_log.fit(X_train, y_train)

# Make predictions
y_pred = model_log.predict(X_test)

# Evaluate the model
accuracy_log = accuracy_score(y_test, y_pred)
cm_log = confusion_matrix(y_test, y_pred)
precision_log = precision_score(y_test, y_pred, average='weighted')  # 'weighted' for multi-class
recall_log = recall_score(y_test, y_pred, average='weighted')
f1_log = f1_score(y_test, y_pred, average='weighted')

# priting results
print("Accuracy:", accuracy_log)
print("Precision:", precision_log)
print("Recall:", recall_log)
print("F1-Score:", f1_log)
print("Confusion Matrix:")
print(cm_log)

Accuracy: 0.9777777777777777
Precision: 0.9797979797979799
Recall: 0.9777777777777777
F1-Score: 0.977970177970178
Confusion Matrix:
[[18  0  0]
 [ 0 10  0]
 [ 0  1 16]]


## Support Vector Machine (SVM)

In [7]:
model_svc = SVC()
# Train the model
model_svc.fit(X_train, y_train)

# Make predictions
y_pred = model_svc.predict(X_test)

# Evaluate the model
accuracy_svc = accuracy_score(y_test, y_pred)
cm_svc = confusion_matrix(y_test, y_pred)
precision_svc = precision_score(y_test, y_pred, average='weighted')  # 'weighted' for multi-class
recall_svc = recall_score(y_test, y_pred, average='weighted')
f1_svc = f1_score(y_test, y_pred, average='weighted')

# priting results
print("Accuracy:", accuracy_svc)
print("Precision:", precision_svc)
print("Recall:", recall_svc)
print("F1-Score:", f1_svc)
print("Confusion Matrix:")
print(cm_svc)

Accuracy: 0.9555555555555556
Precision: 0.962962962962963
Recall: 0.9555555555555556
F1-Score: 0.9561868686868688
Confusion Matrix:
[[18  0  0]
 [ 0 10  0]
 [ 0  2 15]]


## Random Forest Classifier

In [8]:
model_rfc = RandomForestClassifier()

model_rfc.fit(X_train, y_train)

# Make predictions
y_pred = model_rfc.predict(X_test)

# Evaluate the model
accuracy_rfc = accuracy_score(y_test, y_pred)
cm_rfc = confusion_matrix(y_test, y_pred)
precision_rfc = precision_score(y_test, y_pred, average='weighted')  # 'weighted' for multi-class
recall_rfc = recall_score(y_test, y_pred, average='weighted')  # 'weighted' for multi-class
f1_rfc = f1_score(y_test, y_pred, average='weighted')  # 'weighted' for multi-class

# priting results
print("Accuracy:", accuracy_rfc)
print("Precision:", precision_rfc)
print("Recall:", recall_rfc)
print("F1-Score:", f1_rfc)
print("Confusion Matrix:")
print(cm_rfc)

Accuracy: 0.9555555555555556
Precision: 0.962962962962963
Recall: 0.9555555555555556
F1-Score: 0.9561868686868688
Confusion Matrix:
[[18  0  0]
 [ 0 10  0]
 [ 0  2 15]]


### Let's compile the different performance metrics of the three algorithms in a single dataframe for comparison.

In [9]:
log = {
    "Accuracy": accuracy_log,
    'Precision': precision_log,
    'Recall': recall_log,
    'F1 score': f1_log,
}

svm={
    'Accuracy': accuracy_svc,
    'Precision':precision_svc,
    'Recall': recall_svc,
    'F1 score': f1_svc,
}

rfc={
    'Accuracy': accuracy_rfc,
    'Precision':precision_rfc,
    'Recall': recall_rfc,
    'F1 score': f1_rfc,
}

combined_metrics = {
    'Logistic Regression': log,
    'SVM': svm,
    'Random Forest': rfc
}

# Creating a DataFrame from the combined_metrics dictionary
df_metrics = pd.DataFrame(combined_metrics)

# Transpose the DataFrame for a more readable format
df_metrics = df_metrics.transpose()

# Display the DataFrame
print(df_metrics)

                     Accuracy  Precision    Recall  F1 score
Logistic Regression  0.977778   0.979798  0.977778  0.977970
SVM                  0.955556   0.962963  0.955556  0.956187
Random Forest        0.955556   0.962963  0.955556  0.956187
