In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

# Load Iris dataset from sklearn
iris_data = load_iris()
df = pd.DataFrame(data=iris_data.data, columns=iris_data.feature_names)
df['species'] = pd.Categorical.from_codes(iris_data.target, iris_data.target_names)

# Display first few rows
print(df.head())

# Check data types and missing values
print(df.info())
print(df.isnull().sum())


In [None]:
# Basic statistics
print(df.describe())

# Group by species and compute mean of each numerical column
grouped_means = df.groupby('species').mean()
print(grouped_means)

# Interesting finding
print("Interesting pattern: Setosa flowers tend to have shorter petals and sepals compared to Virginica.")


In [None]:
# Simulate a date range
df['date'] = pd.date_range(start='2022-01-01', periods=len(df), freq='D')
df['petal length trend'] = df['petal length (cm)'].sort_values().values

plt.figure(figsize=(10, 5))
plt.plot(df['date'], df['petal length trend'], label='Petal Length Trend')
plt.title('Petal Length Trend Over Time')
plt.xlabel('Date')
plt.ylabel('Petal Length (cm)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Bar Chart - Average Petal Length per Species
species_avg = df.groupby('species')['petal length (cm)'].mean().reset_index()

plt.figure(figsize=(8, 5))
sns.barplot(data=species_avg, x='species', y='petal length (cm)', palette='Set2')
plt.title('Average Petal Length per Species')
plt.xlabel('Species')
plt.ylabel('Petal Length (cm)')
plt.tight_layout()
plt.show()


In [None]:
# Histogram - Distribution of Sepal Width
plt.figure(figsize=(8, 5))
sns.histplot(df['sepal width (cm)'], kde=True, color='skyblue')
plt.title('Distribution of Sepal Width')
plt.xlabel('Sepal Width (cm)')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()


In [None]:
# Scatter Plot - Sepal Length vs. Petal Length
plt.figure(figsize=(8, 5))
sns.scatterplot(data=df, x='sepal length (cm)', y='petal length (cm)', hue='species', palette='Set1')
plt.title('Sepal Length vs Petal Length by Species')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Petal Length (cm)')
plt.legend(title='Species')
plt.tight_layout()
plt.show()


In [None]:
# Error Handling Example for reading CSV
try:
    df = pd.read_csv('your_dataset.csv')
except FileNotFoundError:
    print("Error: File not found.")
except pd.errors.EmptyDataError:
    print("Error: File is empty.")
except Exception as e:
    print(f"Unexpected error: {e}")
