In [3]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib  # For saving and loading models


In [4]:
# Load the Iris dataset
iris = load_iris()
X = iris.data  # Features: petal length, petal width, etc.
y = iris.target  # Labels: 0, 1, 2 (representing the three Iris species)

# Check the shape of the dataset
print(f"Features shape: {X.shape}")
print(f"Labels shape: {y.shape}")


Features shape: (150, 4)
Labels shape: (150,)


In [5]:
# Split the dataset (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Print shapes of the splits
print(f"Training features shape: {X_train.shape}")
print(f"Testing features shape: {X_test.shape}")


Training features shape: (105, 4)
Testing features shape: (45, 4)


In [6]:
# Initialize a StandardScaler
scaler = StandardScaler()

# Fit the scaler on the training data and transform it
X_train = scaler.fit_transform(X_train)

# Use the same scaler to transform the testing data
X_test = scaler.transform(X_test)

# Print a sample of the scaled data
print(f"Scaled training data (first 5 rows):\n{X_train[:5]}")


Scaled training data (first 5 rows):
[[-0.4134164  -1.46200287 -0.09951105 -0.32339776]
 [ 0.55122187 -0.50256349  0.71770262  0.35303182]
 [ 0.67180165  0.21701605  0.95119225  0.75888956]
 [ 0.91296121 -0.02284379  0.30909579  0.2177459 ]
 [ 1.63643991  1.41631528  1.30142668  1.70589097]]


In [7]:
# Initialize the RandomForestClassifier
classifier = RandomForestClassifier(random_state=42)

# Train the model on the training data
classifier.fit(X_train, y_train)

# Print the training completion message
print("Model training completed.")


Model training completed.


In [8]:
# Make predictions on the testing data
y_pred = classifier.predict(X_test)

# Print the first few predictions
print(f"Predictions: {y_pred[:10]}")
print(f"Actual labels: {y_test[:10]}")


Predictions: [1 0 2 1 1 0 1 2 1 1]
Actual labels: [1 0 2 1 1 0 1 2 1 1]


In [12]:
# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Generate a classification report
report = classification_report(y_test, y_pred, target_names=iris.target_names)
print("Classification Report:\n", report)


Accuracy: 100.00%
Classification Report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      1.00      1.00        13
   virginica       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import export_graphviz

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train the RandomForestClassifier
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)

# Extract a single tree from the forest (e.g., the first tree)
tree = rf.estimators_[0]

# Export the tree to a .dot file
dot_file_path = 'iris_tree.dot'
export_graphviz(tree, out_file=dot_file_path, 
                feature_names=iris.feature_names,  
                class_names=iris.target_names,
                filled=True, rounded=True,
                special_characters=True)

print(f"Decision tree exported to {dot_file_path}")


Decision tree exported to iris_tree.dot
