# Iris Dataset Analysis
This notebook performs loading, cleaning, analysis, and visualization on the Iris dataset.

In [None]:
# Task 1: Load and Explore the Dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Set Seaborn style
sns.set(style="whitegrid")


: 

In [None]:
# Load Iris dataset
try:
    iris_data = load_iris()
    df = pd.DataFrame(iris_data.data, columns=iris_data.feature_names)
    df['species'] = iris_data.target
    df['species'] = df['species'].apply(lambda x: iris_data.target_names[x])
    print("Dataset loaded successfully.\n")
except Exception as e:
    print("Error loading dataset:", e)

# Display first few rows
df.head()

In [None]:
# Explore data types and check for missing values
print(df.info())
df.isnull().sum()

In [None]:
# Clean dataset (if any missing values)
df = df.dropna()
df.info()

In [None]:
# Task 2: Basic Data Analysis
# Descriptive statistics
df.describe()

In [None]:
# Grouping by species and computing mean
grouped = df.groupby('species').mean()
grouped

### Observations:
- *Setosa* flowers generally have smaller petal lengths and widths compared to *Virginica* and *Versicolor*.
- Sepal length is relatively consistent across species but shows some variation.


In [None]:
# Task 3: Data Visualization
# Line chart (simulate time-series by index)
plt.figure(figsize=(10, 5))
plt.plot(df.index, df['sepal length (cm)'], label='Sepal Length')
plt.plot(df.index, df['petal length (cm)'], label='Petal Length')
plt.title('Simulated Time-Series of Sepal and Petal Length')
plt.xlabel('Index')
plt.ylabel('Length (cm)')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Bar chart: average petal length per species
plt.figure(figsize=(8, 5))
sns.barplot(x=grouped.index, y=grouped['petal length (cm)'])
plt.title('Average Petal Length by Species')
plt.xlabel('Species')
plt.ylabel('Average Petal Length (cm)')
plt.tight_layout()
plt.show()

In [None]:
# Histogram of petal width
plt.figure(figsize=(8, 5))
sns.histplot(df['petal width (cm)'], bins=20, kde=True, color='purple')
plt.title('Distribution of Petal Width')
plt.xlabel('Petal Width (cm)')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()

In [None]:
# Scatter plot: Sepal Length vs. Petal Length
plt.figure(figsize=(8, 5))
sns.scatterplot(data=df, x='sepal length (cm)', y='petal length (cm)', hue='species')
plt.title('Sepal Length vs. Petal Length by Species')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Petal Length (cm)')
plt.legend(title='Species')
plt.tight_layout()
plt.show()