# ðŸŒ¸ Iris Flower Classification â€” CodeAlpha Internship Task 1
This notebook demonstrates classification of Iris flower species using machine learning.
We'll perform data exploration, visualization, model training, and evaluation.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

In [None]:

df = pd.read_csv('Iris.csv')
print('Dataset loaded successfully!')
df.head()

In [None]:
# Dataset info and missing values
print(df.info())
print('\nMissing Values:\n', df.isnull().sum())

In [None]:
# Drop Id column if exists
if 'Id' in df.columns:
    df = df.drop(['Id'], axis=1)

# Encode target labels
le = LabelEncoder()
df['Species'] = le.fit_transform(df['Species'])
df.head()

In [None]:
# Exploratory Data Analysis (EDA)
print(df.describe())

# Pairplot
sns.pairplot(df, hue='Species', palette='Set2')
plt.suptitle('Pairplot of Iris Features by Species', y=1.02)
plt.show()

# Correlation heatmap
plt.figure(figsize=(8,5))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Feature Correlation Heatmap')
plt.show()

In [None]:
# Split dataset into train and test sets
X = df.drop('Species', axis=1)
y = df['Species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print('Training and testing sets created.')

In [None]:
# Train and evaluate models
models = {
    'Logistic Regression': LogisticRegression(max_iter=200),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42)
}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results[name] = acc
    print(f'\n{name} Results:')
    print('Accuracy:', acc)
    print('Classification Report:\n', classification_report(y_test, y_pred))
    print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))

In [None]:
# Compare model performance
plt.figure(figsize=(7,4))
sns.barplot(x=list(results.keys()), y=list(results.values()), palette='Set2')
plt.title('Model Accuracy Comparison')
plt.ylabel('Accuracy')
plt.ylim(0.8, 1.0)
plt.show()

best_model = max(results, key=results.get)
print(f'Best Performing Model: {best_model} with accuracy {results[best_model]:.2f}')

In [None]:
# Save the best model
joblib.dump(models[best_model], 'iris_best_model.pkl')
print('Best model saved as iris_best_model.pkl')