## Import necessary libraries

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

## Load the Iris dataset

In [7]:
# Load the Iris dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
iris_data = pd.read_csv(url, names=names)

# Display the first few rows of the dataset
print(iris_data.head())


   sepal_length  sepal_width  petal_length  petal_width        class
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


In [8]:
# Split the dataset into features (X) and target variable (y)
X = iris_data.iloc[:, :-1]
y = iris_data.iloc[:, -1]

## Initialize the Random Forest model and train the model

In [9]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_model.predict(X_test)

## Evaluate the performance of the model

In [10]:
# Evaluate the performance of the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Display the results
print(f"Accuracy: {accuracy}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_rep)

Accuracy: 1.0

Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]

Classification Report:
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30



## Make predictions on new data

In [12]:
# Example of new data for prediction
new_data = [[5.1, 3.5, 1.4, 0.2],  # Sample 1
            [6.3, 2.8, 5.1, 1.5],  # Sample 2
            [7.0, 3.2, 4.7, 1.4]]  # Sample 3

# Convert the new data to a DataFrame
new_data_df = pd.DataFrame(new_data, columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'])

# Use the trained model to make predictions on the new data
predictions = rf_model.predict(new_data_df)

# Actual class labels for the new data
actual_labels = ['Iris-setosa', 'Iris-virginica', 'Iris-versicolor']

# Compare predictions with actual labels
for i, (prediction, actual) in enumerate(zip(predictions, actual_labels)):
    print(f"Sample {i+1}: Predicted Class - {prediction}, Actual Class - {actual}")

# Check accuracy
accuracy = accuracy_score(actual_labels, predictions)
print(f"\nAccuracy on new data: {accuracy}")


Sample 1: Predicted Class - Iris-setosa, Actual Class - Iris-setosa
Sample 2: Predicted Class - Iris-virginica, Actual Class - Iris-virginica
Sample 3: Predicted Class - Iris-versicolor, Actual Class - Iris-versicolor

Accuracy on new data: 1.0


[Architecture and Maths behind Random Forest](https://chat.openai.com/share/2e71f0ae-703a-46f4-b363-189475c82815)

[Code explanation and accuracies](https://chat.openai.com/share/6de66349-1c6c-4e4f-ac26-646315fb7297)