In [None]:
# Iris Dataset Analysis with Pandas and Matplotlib

# Task 0: Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

sns.set(style="whitegrid")
%matplotlib inline

# Task 1: Load and Explore the Dataset
iris_data = load_iris()
df = pd.DataFrame(iris_data.data, columns=iris_data.feature_names)
df['species'] = pd.Categorical.from_codes(iris_data.target, iris_data.target_names)

# Display first 5 rows
df.head()

# Check structure, data types, and missing values
print(df.info())
print(df.isnull().sum())

# Task 2: Basic Data Analysis
# Basic statistics
print(df.describe())

# Group by species and calculate mean
species_group = df.groupby('species').mean()
print(species_group)

# Task 3: Data Visualization

# 1. Line chart - Petal length trend by species
plt.figure(figsize=(10,6))
for species in df['species'].unique():
    plt.plot(df[df['species']==species].index,
             df[df['species']==species]['petal length (cm)'],
             label=species)
plt.title('Petal Length Trend by Species')
plt.xlabel('Sample Index')
plt.ylabel('Petal Length (cm)')
plt.legend()
plt.show()

# 2. Bar chart - Average sepal width per species
species_group['sepal width (cm)'].plot(kind='bar', color=['skyblue','lightgreen','salmon'])
plt.title('Average Sepal Width per Species')
plt.ylabel('Sepal Width (cm)')
plt.xlabel('Species')
plt.show()

# 3. Histogram - Distribution of sepal length
plt.figure(figsize=(8,5))
plt.hist(df['sepal length (cm)'], bins=15, color='purple', alpha=0.7)
plt.title('Distribution of Sepal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Frequency')
plt.show()

# 4. Scatter plot - Sepal length vs petal length
plt.figure(figsize=(8,6))
sns.scatterplot(data=df, x='sepal length (cm)', y='petal length (cm)', hue='species', palette='bright')
plt.title('Sepal Length vs Petal Length by Species')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Petal Length (cm)')
plt.show()

# Observations
print("Observations:")
print("- Iris-setosa has smaller petals compared to the other species.")
print("- Iris-virginica has generally the largest sepal and petal measurements.")
print("- Positive correlation between sepal length and petal length.")
print("- Histograms show some overlap in sepal length, suggesting multiple features are needed for classification.")
