In [None]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# 1. Load the Iris dataset
# The Iris dataset is a classic dataset in machine learning, containing measurements
# for 150 iris flowers from three different species.
iris = load_iris() 

# Create a DataFrame for better visualization (optional, but good practice)
# The dataset contains 4 features: sepal length, sepal width, petal length, petal width
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target  # Add the target species (0, 1, or 2)

print("First 5 rows of the dataset:")
print(df.head())
print("\n" + "="*50 + "\n")

# 2. Split the data into features (X) and target (y)
X = iris.data
y = iris.target

# 3. Split the data into training and testing sets
# We use 80% of the data for training and 20% for testing.
# random_state ensures reproducible results.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Initialize the Decision Tree Classifier
# We can tune parameters like max_depth, but default works well for this simple dataset.
clf = DecisionTreeClassifier(random_state=42)

# 5. Train the model using the training sets
clf.fit(X_train, y_train)

# 6. Make predictions on the test set
y_pred = clf.predict(X_test)

# 7. Calculate and print the accuracy
# Accuracy is the fraction of correct predictions.
accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy:.2f} ({accuracy*100:.2f}%)")

# Optional: Show a few sample predictions vs actuals
print("\nSample predictions vs Actuals:")
for i in range(5):
    species_idx_pred = y_pred[i]
    species_idx_actual = y_test[i]
    print(f"Predicted: {iris.target_names[species_idx_pred]}, Actual: {iris.target_names[species_idx_actual]}")


    

First 5 rows of the dataset:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  


Model Accuracy: 1.00 (100.00%)

Sample predictions vs Actuals:
Predicted: versicolor, Actual: versicolor
Predicted: setosa, Actual: setosa
Predicted: virginica, Actual: virginica
Predicted: versicolor, Actual: versicolor
Predicted: versicolor, Actual: versicolor
