In [None]:
# Import necessary libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
# Load the dataset
file_path = '/mnt/data/Iris.csv'
data = pd.read_csv(file_path)

In [None]:
# Display the first few rows of the dataset
data.head()

In [None]:
# Basic data overview
print("Dataset Info:")
data.info()

In [None]:
# Summary statistics
print("\nSummary statistics:")
print(data.describe())

In [None]:
# Check for missing values
print("\nMissing values in the dataset:")
print(data.isnull().sum())

In [None]:
# Data visualization
sns.pairplot(data, hue='Species')
plt.show()

In [None]:
# Feature selection
X = data.drop(['Id', 'Species'], axis=1)
y = data['Species']

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Train the model
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

In [None]:
# Make predictions
y_pred = classifier.predict(X_test)

In [None]:
# Evaluate the model
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

In [None]:
# Feature importance
feature_importances = pd.Series(classifier.feature_importances_, index=X.columns)
feature_importances.sort_values(ascending=False).plot(kind='bar', title='Feature Importance')
plt.show()