In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# -------------------
# Task 1: Load and Explore the Dataset
# -------------------

# Load iris dataset from sklearn (offline)
iris_data = load_iris(as_frame=True)
df = iris_data.frame
# Rename columns to match typical iris dataset style
df.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']

# Display the first few rows
print("First rows:")
print(df.head())

# Explore structure
print("\nData types:")
print(df.dtypes)

print("\nMissing values:")
print(df.isnull().sum())

# Clean the dataset (no missing values in iris dataset, but example)
df_cleaned = df.dropna()

# -------------------
# Task 2: Basic Data Analysis
# -------------------
print("\nBasic statistics:")
print(df_cleaned.describe())

print("\nGroup means by species:")
group_means = df_cleaned.groupby('species').mean(numeric_only=True)
print(group_means)

# -------------------
# Task 3: Data Visualization
# -------------------

# 1. Line chart: sepal length trend across index
plt.figure(figsize=(8, 5))
plt.plot(df_cleaned.index, df_cleaned['sepal_length'], label='Sepal Length')
plt.title('Line Chart: Sepal Length Trend over Index')
plt.xlabel('Index')
plt.ylabel('Sepal Length')
plt.legend()
plt.grid()
plt.show()

# 2. Bar chart: Average petal length per species
plt.figure(figsize=(8, 5))
sns.barplot(x='species', y='petal_length', data=df_cleaned, estimator='mean')
plt.title('Bar Chart: Average Petal Length per Species')
plt.xlabel('Species')
plt.ylabel('Average Petal Length')
plt.show()

# 3. Histogram: Sepal Width distribution
plt.figure(figsize=(8, 5))
plt.hist(df_cleaned['sepal_width'], bins=20, edgecolor='black')
plt.title('Histogram: Sepal Width Distribution')
plt.xlabel('Sepal Width')
plt.ylabel('Frequency')
plt.show()

# 4. Scatter plot: Sepal Length vs Petal Length
plt.figure(figsize=(8, 5))
sns.scatterplot(x='sepal_length', y='petal_length', hue='species', data=df_cleaned)
plt.title('Scatter Plot: Sepal Length vs Petal Length')
plt.xlabel('Sepal Length')
plt.ylabel('Petal Length')
plt.legend(title='Species')
plt.show()


ModuleNotFoundError: No module named 'matplotlib'