In [None]:
# Iris Dataset Matplotlib Learning Guide
# Complete code examples for learning matplotlib with the Iris dataset

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load the data
df = pd.read_csv('Iris.csv')

print("=== PART 1: UNDERSTANDING YOUR DATA ===")
print("Dataset shape:", df.shape)
print("\nFirst few rows:")
print(df.head())
print("\nDataset info:")
print(df.info())
print("\nSpecies counts:")
print(df['Species'].value_counts())

print("\n=== PART 2: BASIC LINE PLOT ===")
# Q3: Can we create a simple line plot with this data?
plt.figure(figsize=(10, 6))
plt.plot(df['SepalLengthCm'], label='Sepal Length')
plt.plot(df['SepalWidthCm'], label='Sepal Width') 
plt.plot(df['PetalLengthCm'], label='Petal Length')
plt.plot(df['PetalWidthCm'], label='Petal Width')
plt.title('Iris Measurements - Line Plot')
plt.xlabel('Sample Index')
plt.ylabel('Measurement (cm)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

print("\n=== PART 3: SCATTER PLOTS ===")
# Q4: How do we create scatter plots to explore relationships?
plt.figure(figsize=(12, 5))

# Simple scatter plot
plt.subplot(1, 2, 1)
plt.scatter(df['SepalLengthCm'], df['SepalWidthCm'])
plt.title('Sepal Length vs Sepal Width')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Sepal Width (cm)')
plt.grid(True, alpha=0.3)

# Colored by species
plt.subplot(1, 2, 2)
species_colors = {'Iris-setosa': 'red', 'Iris-versicolor': 'blue', 'Iris-virginica': 'green'}
for species in df['Species'].unique():
    species_data = df[df['Species'] == species]
    plt.scatter(species_data['SepalLengthCm'], species_data['SepalWidthCm'], 
               c=species_colors[species], label=species, alpha=0.7)
plt.title('Sepal Length vs Sepal Width (by Species)')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Sepal Width (cm)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print("\n=== PART 4: HISTOGRAMS ===")
# Q5: How do we visualize the distribution of our data?
plt.figure(figsize=(12, 8))

columns = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']
for i, column in enumerate(columns, 1):
    plt.subplot(2, 2, i)
    plt.hist(df[column], bins=15, alpha=0.7, color='skyblue', edgecolor='black')
    plt.title(f'Distribution of {column}')
    plt.xlabel(f'{column}')
    plt.ylabel('Frequency')
    plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\n=== PART 5: BOX PLOTS ===")
# Q6: How do we compare distributions across species?
plt.figure(figsize=(12, 8))

for i, column in enumerate(columns, 1):
    plt.subplot(2, 2, i)
    species_data = []
    species_labels = []
    for species in df['Species'].unique():
        species_data.append(df[df['Species'] == species][column])
        species_labels.append(species.split('-')[1])  # Remove 'Iris-' prefix
    
    plt.boxplot(species_data, labels=species_labels)
    plt.title(f'{column} by Species')
    plt.ylabel(f'{column}')
    plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\n=== PART 6: MULTIPLE SUBPLOTS - ADVANCED ===")
# Q7: How do we create a comprehensive visualization?
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Iris Dataset - Comprehensive Analysis', fontsize=16)

# Scatter plot matrix
axes[0, 0].scatter(df['SepalLengthCm'], df['PetalLengthCm'], 
                  c=pd.Categorical(df['Species']).codes, cmap='viridis', alpha=0.6)
axes[0, 0].set_title('Sepal Length vs Petal Length')
axes[0, 0].set_xlabel('Sepal Length (cm)')
axes[0, 0].set_ylabel('Petal Length (cm)')
axes[0, 0].grid(True, alpha=0.3)

# Different scatter plot
axes[0, 1].scatter(df['SepalWidthCm'], df['PetalWidthCm'], 
                  c=pd.Categorical(df['Species']).codes, cmap='viridis', alpha=0.6)
axes[0, 1].set_title('Sepal Width vs Petal Width')
axes[0, 1].set_xlabel('Sepal Width (cm)')
axes[0, 1].set_ylabel('Petal Width (cm)')
axes[0, 1].grid(True, alpha=0.3)

# Bar plot - average measurements by species
species_means = df.groupby('Species')[columns].mean()
x_pos = np.arange(len(species_means.index))
width = 0.2

for i, column in enumerate(columns):
    axes[1, 0].bar(x_pos + i*width, species_means[column], width, 
                   label=column, alpha=0.8)

axes[1, 0].set_title('Average Measurements by Species')
axes[1, 0].set_xlabel('Species')
axes[1, 0].set_ylabel('Average Measurement (cm)')
axes[1, 0].set_xticks(x_pos + width * 1.5)
axes[1, 0].set_xticklabels([s.split('-')[1] for s in species_means.index])
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Correlation heatmap using imshow
correlation_matrix = df[columns].corr()
im = axes[1, 1].imshow(correlation_matrix, cmap='coolwarm', aspect='auto')
axes[1, 1].set_title('Correlation Matrix')
axes[1, 1].set_xticks(range(len(columns)))
axes[1, 1].set_yticks(range(len(columns)))
axes[1, 1].set_xticklabels([col.replace('Cm', '') for col in columns], rotation=45)
axes[1, 1].set_yticklabels([col.replace('Cm', '') for col in columns])

# Add correlation values as text
for i in range(len(columns)):
    for j in range(len(columns)):
        text = axes[1, 1].text(j, i, f'{correlation_matrix.iloc[i, j]:.2f}',
                              ha="center", va="center", color="black", fontweight='bold')

plt.colorbar(im, ax=axes[1, 1])
plt.tight_layout()
plt.show()

print("\n=== PART 7: CUSTOMIZATION EXAMPLES ===")
# Q8: How do we make our plots look professional?
plt.style.use('seaborn-v0_8')  # Use a nice style
fig, ax = plt.subplots(figsize=(12, 8))

# Create a more sophisticated scatter plot
for species in df['Species'].unique():
    species_data = df[df['Species'] == species]
    ax.scatter(species_data['PetalLengthCm'], species_data['PetalWidthCm'], 
              label=species.split('-')[1], alpha=0.7, s=100)

ax.set_title('Petal Measurements by Species', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('Petal Length (cm)', fontsize=12)
ax.set_ylabel('Petal Width (cm)', fontsize=12)
ax.legend(title='Species', title_fontsize=12, fontsize=10)
ax.grid(True, alpha=0.3)

# Add some statistics as text
ax.text(0.02, 0.98, f'Total samples: {len(df)}', transform=ax.transAxes, 
        verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))

plt.tight_layout()
plt.show()

print("\n=== Your turn to experiment! ===")
print("Try modifying the code above to:")
print("1. Change colors and markers")
print("2. Add more annotations")
print("3. Create different plot combinations")
print("4. Experiment with different matplotlib styles")