In [ ]:
from google.colab import files
uploaded = files.upload()

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

df=pd.read_csv("IRIS.csv")
df

In [ ]:
print("\nMissing values in each column:")
df.isnull().sum()

In [ ]:
print("\nUnique values in 'species' column:")
df['species'].unique()

print("\nUnique values in 'sepal_length' column:")
df['sepal_length'].unique()

print("\nUnique values in 'sepal_width' column:")
df['sepal_width'].unique()

print("\nUnique values in 'petal_length' column:")
df['petal_length'].unique()

print("\nUnique values in 'petal_width' column:")
df['petal_width'].unique()

In [ ]:
print("\nCount of unique values in 'species' column:")
print(df['species'].value_counts())

print("\nCount of unique values in 'sepal_length' column:")
print(df['sepal_length'].value_counts())

print("\nCount of unique values in 'sepal_width' column:")
print(df['sepal_width'].value_counts())

print("\nCount of unique values in 'petal_length' column:")
print(df['petal_length'].value_counts())

print("\nCount of unique values in 'petal_width' column:")
print(df['petal_width'].value_counts())

In [ ]:
print("\nDuplicate rows in the Dataframe:")
print(df.duplicated().sum())

df = df.drop_duplicates()

In [ ]:
df.info()

In [ ]:
df.describe()

In [ ]:
df.describe(include='all')

In [ ]:
df["sepal_length"].plot(kind='bar',figsize=(8,4),title='sepal_length by species',color='lightgreen')

In [ ]:
# Histogram for sepal_width
plt.figure(figsize=(6, 4))
plt.hist(df['sepal_width'], bins=30, color='purple', edgecolor='black')
plt.title('Sepal Width Distribution')
plt.xlabel('Sepal Width')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

# Histogram for sepal_length
plt.figure(figsize=(6, 4))
plt.hist(df['sepal_length'], bins=30, color='green', edgecolor='black')
plt.title('Sepal Length Distribution')
plt.xlabel('Sepal Length')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

In [ ]:
plt.figure(figsize=(6,4))
plt.plot(df['petal_length'],df['sepal_length'],marker='o',linestyle='-',color='b') # Line plot
plt.title("Petal_length over Sepal_length")
plt.xlabel("Petal_length")
plt.ylabel("Sepal_length")
plt.grid(True)
plt.show()

In [ ]:
sns.pairplot(df, hue='species', markers=["o", "s", "D"])
plt.show()

In [ ]:
plt.figure(figsize=(6,4))
plt.scatter(df["petal_length"],df["species"],color='red',alpha=0.7)
plt.title("Random Scatter Plot")
plt.xlabel("X Values")
plt.ylabel("Y Values")
plt.grid(True)
plt.show()

In [ ]:
plt.figure(figsize=(6,4))
corr_matrix=(df[["sepal_length","sepal_width","petal_length","petal_width"]]).corr()
sns.heatmap(corr_matrix,annot=True,cmap="coolwarm",fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()

In [ ]:
plt.figure(figsize=(6,4))
sns.histplot(df[["species","petal_length"]],bins=20,kde=True, color='purple')
plt.title('Distribution of Petal length according to the Species')
plt.xlabel('Species')
plt.ylabel('Petal length')
plt.show()

In [ ]:
plt.figure(figsize=(6,4))
sns.boxplot(x='petal_width',y='species',data=df, palette='pastel')
plt.title("Species according to the petal_width")
plt.xlabel("petal_width")
plt.ylabel("Species")
plt.show()

In [ ]:
sns.set(style="whitegrid")

plt.figure(figsize=(6,4))
sns.barplot(x='sepal_length',y='species',data=df,estimator='mean',palette='muted')
plt.title("Species according to the sepal length ")
plt.xlabel("Sepal length")
plt.ylabel("Species")
plt.xticks(rotation=45)
plt.show()

In [ ]:
species_counts = df['species'].value_counts()
plt.pie(species_counts, labels=species_counts.index, autopct='%1.1f%%')
plt.title('Species Distribution')
plt.show()

In [ ]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

le = LabelEncoder()
df['species'] = le.fit_transform(df['species'])

X = df.drop('species', axis=1)
y = df['species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))