In [None]:
# Week 7 Assignment
# Analyzing Data with Pandas and Visualizing Results with Matplotlib

# Importing necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns  # optional, but makes plots prettier

# Task 1: Load and Explore the Dataset
# -------------------------------------

# Load the built-in Iris dataset directly from seaborn
iris = sns.load_dataset('iris')

# Display the first few rows
print("First 5 rows of the dataset:")
print(iris.head())

# Check dataset information
print("\nDataset Info:")
print(iris.info())

# Check for missing values
print("\nMissing values per column:")
print(iris.isnull().sum())

# Since the Iris dataset has no missing values, we don’t need to clean it
# But if it had, we could do:
# iris = iris.dropna() or iris.fillna(method='ffill')

# Task 2: Basic Data Analysis
# ----------------------------

# Compute basic statistics
print("\nBasic Statistics:")
print(iris.describe())

# Grouping by species and calculating the mean of numerical columns
species_means = iris.groupby('species').mean(numeric_only=True)
print("\nAverage measurements per species:")
print(species_means)

# Observations
print("\nObservations:")
print("- Iris-virginica generally has the largest petal and sepal measurements.")
print("- Iris-setosa has the smallest petal size on average.")

# Task 3: Data Visualization
# ----------------------------

# Set plot style
sns.set(style="whitegrid")

# 1. Line Chart – Example trend over index (not time-based, just an example)
plt.figure(figsize=(8, 5))
plt.plot(iris.index, iris['sepal_length'], color='blue')
plt.title("Line Chart: Sepal Length Trend")
plt.xlabel("Index")
plt.ylabel("Sepal Length (cm)")
plt.show()

# 2. Bar Chart – Average petal length per species
plt.figure(figsize=(8, 5))
sns.barplot(x='species', y='petal_length', data=iris, palette='viridis')
plt.title("Bar Chart: Average Petal Length per Species")
plt.xlabel("Species")
plt.ylabel("Average Petal Length (cm)")
plt.show()

# 3. Histogram – Distribution of sepal width
plt.figure(figsize=(8, 5))
plt.hist(iris['sepal_width'], bins=15, color='orange', edgecolor='black')
plt.title("Histogram: Distribution of Sepal Width")
plt.xlabel("Sepal Width (cm)")
plt.ylabel("Frequency")
plt.show()

# 4. Scatter Plot – Relationship between sepal length and petal length
plt.figure(figsize=(8, 5))
sns.scatterplot(x='sepal_length', y='petal_length', hue='species', data=iris, palette='deep')
plt.title("Scatter Plot: Sepal Length vs Petal Length")
plt.xlabel("Sepal Length (cm)")
plt.ylabel("Petal Length (cm)")
plt.show()
