In [1]:
#Import the required libraries
#Library to read the file
import pandas as pd
#Library to label encoding 
from sklearn.preprocessing import LabelEncoder
#Library to split dataset into training ad test dataset
from sklearn.model_selection import train_test_split
#Library for Machine Learning classification model
from sklearn.ensemble import RandomForestClassifier
#Library to validate the model
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

#Download the iris.csv file 
iris_url = ("https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv")
df = pd.read_csv(iris_url)

#Preprocess 
#split data into features and target 
X = df.drop(columns="variety")
y = df["variety"]
#label encoding the target ‘variety’ column
le = LabelEncoder()
y = le.fit_transform(y)

#Split the data for training and testing
#Set Training and Testing Sizes
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 10)

#Check Training dimensions 
print("Training Set dimensions:",X_train.shape )
#Check Testing dimensions
print("Testing Set dimensions:",X_test.shape)

#Select and build the model
#Apply K Nearest Neighbors Machine Learning classification model
rf_classifier= RandomForestClassifier()

#Training the modle
rf_classifier.fit(X_train,y_train)

#making predictions on live data that the model hasn’t seen before
y_pred=rf_classifier.predict(X_test)
print("RF predictions:\n",y_pred)
y_expect = y_test
print("RF Expected:\n",y_expect)

#Calculate Metrics for validate the model
#Accuracy Score
print("RF Accuracy score:",f"{rf_classifier.score(X_test, y_test):.2%}")
#Confusion Matrix 
rf_confusion = confusion_matrix(y_test, y_pred)
print("RF Confusion Matrix:\n",rf_confusion)
#Classification Report
rf_target_names = ["Setosa", "Versicolor", "Virginica"]
print("RF Classification Report: \n", classification_report(y_test, y_pred,target_names=rf_target_names))



Training Set dimensions: (120, 4)
Testing Set dimensions: (30, 4)
RF predictions:
 [1 2 0 1 0 1 1 1 0 1 1 2 1 0 0 2 1 0 0 0 2 2 2 0 1 0 1 1 1 2]
RF Expected:
 [1 2 0 1 0 1 1 1 0 1 1 2 1 0 0 2 1 0 0 0 2 2 2 0 1 0 1 1 1 2]
RF Accuracy score: 100.00%
RF Confusion Matrix:
 [[10  0  0]
 [ 0 13  0]
 [ 0  0  7]]
RF Classification Report: 
               precision    recall  f1-score   support

      Setosa       1.00      1.00      1.00        10
  Versicolor       1.00      1.00      1.00        13
   Virginica       1.00      1.00      1.00         7

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

