In [None]:
# Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

In [None]:
# Loading the IRIS dataset
df = pd.read_csv("Iris.csv")  # Ensure dataset is in the same directory
df.head()

In [None]:
# Check for missing values
print(df.isnull().sum())

# Display dataset details
print(df.info())

print(df.describe())

In [None]:
#Visualing class distribution
sns.countplot(x=df["species"], palette="viridis") 
plt.title("Class Distribution of Iris species")
plt.xlabel("species")
plt.ylabel("Count")
plt.show()


In [None]:
#Plotting pairplot
sns.pairplot(df, hue="species", palette="coolwarm")
plt.show()


In [None]:

# Dropping non-numeric columns(since data contains categorical columns)
numeric_df = df.select_dtypes(include=["number"])

# Plotting the correlation heatmap
sns.heatmap(numeric_df.corr(), annot=True, cmap="coolwarm", linewidths=0.5)

plt.title("Feature Correlation Heatmap")
plt.show()



In [None]:
# Dropping the 'Id' column if it exists
df.drop(columns=["Id"], inplace=True, errors="ignore")


In [None]:
label_encoder = LabelEncoder()
df["species"] = label_encoder.fit_transform(df["species"])
print("Encoded Classes:", dict(enumerate(label_encoder.classes_)))


In [None]:
X = df.drop(columns=["species"])
y = df["species"]

# 80% Training, 20% Testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Training set size: {X_train.shape}, Testing set size: {X_test.shape}")


In [None]:
#Normalising feastures
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
#Training the model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)


In [None]:
#Feature importance
feature_importances = pd.Series(clf.feature_importances_, index=df.columns[:-1])
feature_importances.sort_values(ascending=False).plot(kind="bar", figsize=(10,5), color="teal")
plt.title("Feature Importance")
plt.show()


In [None]:
y_pred = clf.predict(X_test)


In [None]:
#Finiding Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")


In [None]:
print(type(y_test), type(y_pred))
print(label_encoder.classes_) 


In [None]:
#Printing the classification report:
print("Classification Report:\n", classification_report(y_test, y_pred))


In [None]:
y_test = y_test.astype(int)
y_pred = y_pred.astype(int)


In [None]:
#confusion matrix
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()


In [None]:
#Saving the trained model
joblib.dump(clf, "iris_model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")
print("Model and preprocessing objects saved successfully!")


In [None]:
# Load model
loaded_clf = joblib.load("iris_model.pkl")
loaded_scaler = joblib.load("scaler.pkl")
loaded_label_encoder = joblib.load("label_encoder.pkl")

# Example prediction
sample = np.array([[5.9,3,5.1,1.8]])  # Example input
sample_scaled = loaded_scaler.transform(sample)
predicted_class = loaded_clf.predict(sample_scaled)

print("Predicted Species:", loaded_label_encoder.inverse_transform(predicted_class)[0])
