In [None]:
# Importing necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Task 1: Load and Explore the Dataset
try:
    # Load the Iris dataset from seaborn
    df = sns.load_dataset('iris')
    print("Dataset successfully loaded!")
except Exception as e:
    print(f"Error loading dataset: {e}")

# Display the first few rows of the dataset
print("\nFirst 5 rows of the dataset:")
print(df.head())

# Explore the structure of the dataset
print("\nDataset Info:")
print(df.info())

# Check for missing values
missing_values = df.isnull().sum()
print("\nMissing Values in Each Column:")
print(missing_values)

# Task 1: Cleaning the Dataset (if required)
if missing_values.sum() > 0:
    # Example of handling missing values (here we don't have any, but this is for demonstration)
    df = df.fillna(df.mean())

# Task 2: Basic Data Analysis
# Compute basic statistics
print("\nBasic Statistics of Numerical Columns:")
print(df.describe())

# Grouping and aggregation
grouped = df.groupby('species').mean()
print("\nMean of Numerical Columns Grouped by Species:")
print(grouped)

# Task 2: Observations
print("\nObservations:")
print("- The Iris dataset has three species: setosa, versicolor, and virginica.")
print("- Setosa species generally have shorter petals compared to the others.")

# Task 3: Data Visualization
# 1. Line chart showing trends over time
plt.figure(figsize=(8, 5))
plt.plot(df.index, df['sepal_length'], label='Sepal Length', color='blue')
plt.plot(df.index, df['petal_length'], label='Petal Length', color='orange')
plt.title('Sepal Length and Petal Length Trends')
plt.xlabel('Index')
plt.ylabel('Length (cm)')
plt.legend()
plt.show()

# 2. Bar chart for average petal length per species
plt.figure(figsize=(8, 5))
grouped['petal_length'].plot(kind='bar', color=['blue', 'orange', 'green'])
plt.title('Average Petal Length per Species')
plt.xlabel('Species')
plt.ylabel('Petal Length (cm)')
plt.show()

# 3. Histogram for Sepal Width distribution
plt.figure(figsize=(8, 5))
plt.hist(df['sepal_width'], bins=10, color='purple', edgecolor='black')
plt.title('Distribution of Sepal Width')
plt.xlabel('Sepal Width (cm)')
plt.ylabel('Frequency')
plt.show()

# 4. Scatter plot for Sepal Length vs Petal Length
plt.figure(figsize=(8, 5))
sns.scatterplot(x='sepal_length', y='petal_length', hue='species', data=df, palette='viridis')
plt.title('Sepal Length vs Petal Length')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Petal Length (cm)')
plt.legend(title='Species')
plt.show()
