In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the Iris dataset
from sklearn.datasets import load_iris
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)

# Take a quick look at the first few rows
print(df.head())


In [None]:
# Get basic information about the dataset
print(df.info())

print(f"\n{"-"*80}\n")

# Get statistical summary of the dataset
print(df.describe())

print(f"\n{"-"*80}\n")

# Check for missing values
print(df.isnull().sum())

print(f"\n{"-"*80}\n")

# Count the number of samples in each class
print(df['species'].value_counts())

In [None]:
# Create a pairplot to visualize relationships between features
sns.pairplot(df, hue='species')
plt.tight_layout()
plt.show()

# Create box plots to visualize the distribution of each feature by species
plt.figure(figsize=(12, 8))
for i, feature in enumerate(iris.feature_names):
    plt.subplot(2, 2, i+1)
    sns.boxplot(x='species', y=feature, data=df)
    plt.title(f'Distribution of {feature} by Species')
plt.tight_layout()
plt.show()

# Create a correlation heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(df.drop('species', axis=1).corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation Matrix')
plt.tight_layout()
plt.show()

In [None]:
# Calculate group statistics
print(df.groupby('species', observed=False).mean())
print(df.groupby('species', observed=False).std())

# Normalize data for better comparison
df_norm = df.copy()
for feature in iris.feature_names:
    df_norm[feature] = (df[feature] - df[feature].mean()) / df[feature].std()

# Create a parallel coordinates plot
plt.figure(figsize=(10, 6))
pd.plotting.parallel_coordinates(df_norm.drop('species', axis=1).join(df['species']), 
                                'species', colormap='viridis')
plt.title('Parallel Coordinates Plot')
plt.tight_layout()
plt.show()