In [None]:
# Data manipulation and visualization
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Machine learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [None]:
# Load the dataset
df = pd.read_csv('IRIS.csv')  # Make sure the file is in the same directory

# Display the first few rows
df.head()

In [None]:
# Dataset info
df.info()

# Check for missing values
df.isnull().sum()

# Pairplot for visual inspection
sns.pairplot(df, hue='species')
plt.show()

In [None]:
# Encode species labels to numerical values
le = LabelEncoder()
df['species'] = le.fit_transform(df['species'])  # 0: setosa, 1: versicolor, 2: virginica

# Features and target
X = df.drop('species', axis=1)
y = df['species']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Initialize the classifier
model = DecisionTreeClassifier(random_state=42)

# Train the model
model.fit(X_train, y_train)


In [None]:
# Predict on test data
y_pred = model.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Confusion matrix
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues',
            xticklabels=le.classes_, yticklabels=le.classes_)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()