### Classify Iris Flowers


In [133]:
# Importing the libraries

from sklearn.datasets import load_iris
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report,accuracy_score, precision_score, recall_score, f1_score


In [134]:
# Load the Iris dataset

iris = load_iris()

# Converting the dataset to a Pandas DataFrame

iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['species'] = iris.target
iris_df['species'] = iris_df['species'].map({0: 'Setosa', 1: 'Versicolor', 2: 'Virginica'})

# Display the first few rows of the dataset

iris_df.head()


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [135]:
# Define features and target for model training

X = iris.data
y = iris.target

### Split Data Into Testing And Training Sets

In [136]:
# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

### Grid search CV

In [137]:
# Defining different models and their parameters to be used by Grid Search CV to determiine the best possible model
# which can be used for model training ( SVM , Decision_Tree , Logistic_regression)

model_params = {
    'svm': {
        'model': svm.SVC(gamma='auto',probability=True),
        'params' : {
            'svc__C': [1,10,100,1000],
            'svc__kernel': ['rbf','linear']
        }  
    },
    'decision_tree': {
        'model': DecisionTreeClassifier(),
        'params': {
            'decisiontreeclassifier__criterion': ['gini', 'entropy'],
            'decisiontreeclassifier__max_depth': [None, 10, 20, 30],
            'decisiontreeclassifier__min_samples_split': [2, 10, 20]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear'),
        'params': {
            'logisticregression__C': [1,5,10]
        }
    }
}

In [138]:
# Using the Grid Search CV to determine best estimator

scores = []
best_estimators = {}

for algo, mp in model_params.items():
    pipe = make_pipeline(StandardScaler(), mp['model'])
    clf =  GridSearchCV(pipe, mp['params'], cv=5, return_train_score=False)
    clf.fit( X_train , y_train )
    scores.append({
        'model': algo,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    best_estimators[algo] = clf.best_estimator_
    
# Store information of best estimators in pandas dataframe
    
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,0.964032,"{'svc__C': 1, 'svc__kernel': 'linear'}"
1,decision_tree,0.964427,"{'decisiontreeclassifier__criterion': 'gini', ..."
2,logistic_regression,0.929644,{'logisticregression__C': 10}


In [139]:
#  Display best estimator result in form of model and params

best_estimators

{'svm': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('svc',
                  SVC(C=1, gamma='auto', kernel='linear', probability=True))]),
 'decision_tree': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('decisiontreeclassifier',
                  DecisionTreeClassifier(min_samples_split=10))]),
 'logistic_regression': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression',
                  LogisticRegression(C=10, solver='liblinear'))])}

### From estimators score select Decision Tree with a score of 0.964427
### and leave SVM and logistic regression with scores of 0.964032 and 0.929644 respectively.

### Model Training

In [140]:
# Create a pipeline
# Train an Decision Tree model with best parameters found from GridSearchCV

pipe = Pipeline([('scaler', StandardScaler()), ('decisiontreeclassifier', DecisionTreeClassifier(criterion = 'gini', min_samples_split=2,max_depth=3))])
pipe.fit(X_train, y_train)

# Score of the Decision tree model as per data set trained

pipe.score(X_test, y_test)


0.9736842105263158

### Model Evaluation


In [141]:
# Predict the test set results

y_pred = pipe.predict(X_test)

# Calculate accuracy, precision, recall, and F1-score

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Print the evaluation metrics

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-score: {f1}')

# Print classification report

print('\nClassification Report:')
print(classification_report(y_test, y_pred, target_names=iris.target_names))


Accuracy: 0.9736842105263158
Precision: 0.9763157894736842
Recall: 0.9736842105263158
F1-score: 0.9739522830846216

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        13
  versicolor       1.00      0.94      0.97        16
   virginica       0.90      1.00      0.95         9

    accuracy                           0.97        38
   macro avg       0.97      0.98      0.97        38
weighted avg       0.98      0.97      0.97        38



### Making Predictions

In [142]:
# Assume this is a new test data item (example)

new_data = [[5.1, 3.5, 1.4, 0.2]]  # Example data with features (sepal length, sepal width, petal length, petal width)

In [143]:
# Predict the class probabilities

predicted_proba = pipe.predict_proba(new_data)

# Get the predicted class

predicted_class = pipe.predict(new_data)

# Map the predicted class to the target names

predicted_species = iris.target_names[predicted_class]

print(f"Predicted probabilities: {predicted_proba}")
print(f"Predicted class: {predicted_class}")
print(f"Predicted species: {predicted_species}")

Predicted probabilities: [[1. 0. 0.]]
Predicted class: [0]
Predicted species: ['setosa']
