# Data Visualization Mini Practice Tasks Solutions

This notebook contains solutions for the mini practice tasks in the Python Data Science Guide visualization section.

## Task 1: Create a figure with two subplots showing quadratic and exponential functions

**Task Description:** Create a figure with two subplots side by side. In the first subplot, show a quadratic function (y = x²). In the second, show an exponential function (y = e^x). Add appropriate titles, labels, and legends to each plot.

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Create data
x = np.linspace(-2, 2, 100)  # Range from -2 to 2 for better visualization
y1 = x**2                    # Quadratic function
y2 = np.exp(x)               # Exponential function

# Create a figure with two subplots side by side
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# First subplot - Quadratic Function
ax1.plot(x, y1, 'b-', label='y = x²')
ax1.set_title('Quadratic Function')
ax1.set_xlabel('x')
ax1.set_ylabel('y = x²')
ax1.grid(True)
ax1.legend()

# Second subplot - Exponential Function
ax2.plot(x, y2, 'r-', label='y = e^x')
ax2.set_title('Exponential Function')
ax2.set_xlabel('x')
ax2.set_ylabel('y = e^x')
ax2.grid(True)
ax2.legend()

# Adjust layout and add overall title
plt.tight_layout()
fig.suptitle('Comparing Mathematical Functions', fontsize=16, y=1.05)
plt.show()

## Task 2: Create a visualization with histogram and box plot

**Task Description:** Create a visualization that includes a histogram and a box plot of the same dataset (you can use numpy's random functions to create sample data). Compare what insights each visualization provides.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# Set a nice style
plt.style.use('seaborn-v0_8-whitegrid')

# Generate sample data - a mixture of two normal distributions
np.random.seed(42)  # For reproducibility
data1 = np.random.normal(0, 1, 300)  # Mean=0, StdDev=1, 300 samples
data2 = np.random.normal(4, 1.5, 200)  # Mean=4, StdDev=1.5, 200 samples
mixed_data = np.concatenate([data1, data2])

# Create a figure with histogram and boxplot
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8), gridspec_kw={'height_ratios': [2, 1]})

# Histogram with KDE
sns.histplot(mixed_data, bins=30, kde=True, ax=ax1, color='skyblue')
ax1.set_title('Histogram with Kernel Density Estimate')
ax1.set_xlabel('')  # Hide x-label as it's redundant with the boxplot below
ax1.set_ylabel('Frequency')

# Add vertical lines for mean and median
mean_val = np.mean(mixed_data)
median_val = np.median(mixed_data)
ax1.axvline(mean_val, color='red', linestyle='--', linewidth=1.5, label=f'Mean: {mean_val:.2f}')
ax1.axvline(median_val, color='green', linestyle='--', linewidth=1.5, label=f'Median: {median_val:.2f}')
ax1.legend()

# Box plot
sns.boxplot(x=mixed_data, ax=ax2, color='lightgreen')
ax2.set_title('Box Plot')
ax2.set_xlabel('Values')

# Add annotations explaining boxplot components
plt.figtext(0.15, 0.3, "Q1 (25%)", ha="center")
plt.figtext(0.5, 0.3, "Median", ha="center")
plt.figtext(0.85, 0.3, "Q3 (75%)", ha="center")
plt.figtext(0.02, 0.3, "Min (non-outlier)", ha="left")
plt.figtext(0.98, 0.3, "Max (non-outlier)", ha="right")

# Add explanatory text
plt.figtext(0.5, 0.15, 'Observations:\n'
             '- The histogram reveals a bimodal distribution (two peaks)\n'
             '- The box plot shows many outliers on the left side\n'
             '- The mean is greater than the median, indicating right skew\n'
             '- Box plot shows the data quartiles but obscures the bimodal nature',
             ha='center', fontsize=10, bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

plt.tight_layout()
plt.subplots_adjust(hspace=0.3, bottom=0.2)
plt.show()

## Task 3: Pair Plot with Seaborn Sample Dataset

**Task Description:** Load one of Seaborn's sample datasets (like 'iris', 'planets', or 'diamonds') and create a pair plot to explore relationships between variables. Add a custom color palette and adjust the appearance to improve readability.

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# Set the visual style
sns.set_theme(style="ticks")

# Load the iris dataset
iris = sns.load_dataset('iris')

# Display the first few rows of the dataset
print("Iris Dataset Preview:")
print(iris.head())

# Display dataset summary
print("\nDataset Summary:")
print(iris.describe())

# Create a custom color palette
custom_palette = {"setosa": "#FF5733", "versicolor": "#33FF57", "virginica": "#3357FF"}

# Create the pair plot
plt.figure(figsize=(10, 8))
g = sns.pairplot(
    iris,
    hue="species",
    palette=custom_palette,
    diag_kind="kde",
    plot_kws={"s": 80, "alpha": 0.7, "edgecolor": "white", "linewidth": 0.5},
    diag_kws={"shade": True, "linewidth": 1},
    height=2.5
)

# Customize the pair plot
g.fig.suptitle("Iris Dataset: Exploring Relationships Between Features", y=1.02, fontsize=16)

# Add a descriptive text box
analysis_text = (
    "Key Observations:\n"
    "• Setosa is clearly separated from the other species\n"
    "• Petal length and width are highly correlated\n"
    "• Versicolor and virginica show some overlap\n"
    "• Sepal width is the least distinctive feature"
)

plt.figtext(0.91, 0.5, analysis_text, fontsize=11,
           bbox=dict(facecolor='white', alpha=0.8, boxstyle='round,pad=0.5'))

# Tighten the layout and show the plot
plt.tight_layout()
plt.show()

## Task 4: Customized Visualization with Key Insights

**Task Description:** Choose a simple dataset and create a highly customized visualization that emphasizes a key insight. Include custom colors, annotations, a styled legend, and remove unnecessary chart elements for clarity.

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from matplotlib.ticker import FuncFormatter

# Set the style
plt.style.use('seaborn-v0_8')
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Arial']

# Create a simple dataset - Monthly smartphone sales by brand
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']
brand_a = [250, 270, 290, 310, 375, 450]  # Units sold in thousands
brand_b = [300, 310, 325, 340, 360, 370]
brand_c = [280, 275, 270, 260, 240, 220]

# Create a figure and axis
fig, ax = plt.subplots(figsize=(12, 7))

# Plot the data with custom styling
ax.plot(months, brand_a, marker='o', linewidth=3, color='#1f77b4', label='Brand A', markersize=10)
ax.plot(months, brand_b, marker='s', linewidth=3, color='#ff7f0e', label='Brand B', markersize=10)
ax.plot(months, brand_c, marker='^', linewidth=3, color='#2ca02c', label='Brand C', markersize=10)

# Highlight the key insight - Brand A overtaking Brand B in May
ax.axvspan(4, 5, alpha=0.2, color='yellow', label='Brand A Overtakes Brand B')
ax.annotate('Brand A overtakes Brand B',
            xy=(4, 375), xytext=(2.5, 400),
            arrowprops=dict(facecolor='black', shrink=0.05, width=2, headwidth=10),
            fontsize=12, fontweight='bold')

# Highlight the significant growth of Brand A
ax.annotate(f'+{brand_a[-1]-brand_a[0]:,d} units\n(+{(brand_a[-1]-brand_a[0])/brand_a[0]*100:.1f}%)',
            xy=(5, brand_a[-1]), xytext=(5.1, brand_a[-1]),
            fontsize=11, fontweight='bold', color='#1f77b4')

# Highlight the decline of Brand C
ax.annotate(f'{(brand_c[-1]-brand_c[0])/brand_c[0]*100:.1f}%',
            xy=(5, brand_c[-1]), xytext=(5.1, brand_c[-1]-15),
            fontsize=11, fontweight='bold', color='#2ca02c')

# Add gridlines but only on the y-axis for cleaner look
ax.grid(axis='y', linestyle='--', alpha=0.7)

# Remove the top and right spines for a cleaner look
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Add descriptive labels and title
ax.set_xlabel('Month (2025)', fontsize=14)
ax.set_ylabel('Units Sold (thousands)', fontsize=14)
ax.set_title('Smartphone Sales Performance by Brand', fontsize=18, pad=20, fontweight='bold')

# Format the y-axis to show as thousands with a K suffix
def thousands(x, pos):
    'Format ticks as thousands with K suffix'
    return f'{int(x)}K'

ax.yaxis.set_major_formatter(FuncFormatter(thousands))

# Add a styled legend
legend = ax.legend(fontsize=12, loc='upper center', bbox_to_anchor=(0.5, -0.15),
          fancybox=True, shadow=True, ncol=3)

# Add explanatory text box with the key insight
textbox_props = dict(boxstyle='round,pad=0.5', facecolor='white', alpha=0.8, edgecolor='gray')
ax.text(0.02, 0.05, '"Brand A shows exceptional Q2 growth,\n'
         'overtaking the market leader Brand B in May,\n'
         'while Brand C shows consistent decline."',
        transform=ax.transAxes, fontsize=13,
        verticalalignment='bottom', bbox=textbox_props, fontweight='bold')

# Add a subtle subtitle with the data source
fig.text(0.5, 0.01, 'Source: Monthly Smartphone Market Report - May 2025',
         ha='center', fontsize=10, style='italic')

plt.tight_layout()
plt.subplots_adjust(bottom=0.25)  # Make room for the legend
plt.show()

## Task 5: Data Communication and Storytelling

**Task Description:** Design a visualization that tells a clear story about a dataset of your choice. Include an explicit headline that conveys the main insight, visual cues that direct attention to important points, and explanatory text that helps interpret the data.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.ticker import PercentFormatter

# Set the visual style for storytelling
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['font.family'] = 'serif'

# Create dataset - Renewable energy adoption over time by region
years = np.array([2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025])
north_region = np.array([12, 15, 22, 28, 35, 42, 48, 55])
south_region = np.array([8, 9, 11, 14, 21, 32, 45, 62])
east_region = np.array([18, 20, 24, 27, 31, 36, 39, 43])
west_region = np.array([22, 25, 27, 28, 30, 31, 32, 34])
national_target = np.array([15, 20, 25, 30, 35, 40, 45, 50])

# Create figure with two subplots (main chart and small context chart)
fig = plt.figure(figsize=(14, 10))
gs = fig.add_gridspec(3, 3)
ax1 = fig.add_subplot(gs[0:2, :])  # Main chart taking 2/3 of vertical space
ax2 = fig.add_subplot(gs[2, :])    # Small chart at bottom taking 1/3 of vertical space

# Plot data on main chart
ax1.plot(years, north_region, 'o-', linewidth=3, markersize=8, color='#3498db', label='North Region')
ax1.plot(years, south_region, 's-', linewidth=3, markersize=8, color='#e74c3c', label='South Region')
ax1.plot(years, east_region, '^-', linewidth=3, markersize=8, color='#2ecc71', label='East Region')
ax1.plot(years, west_region, 'd-', linewidth=3, markersize=8, color='#f39c12', label='West Region')
ax1.plot(years, national_target, '--', linewidth=2, color='black', alpha=0.7, label='National Target')

# Add vertical line to highlight the current year (2025)
ax1.axvline(x=2025, color='gray', linestyle='--', alpha=0.5)
ax1.text(2025.1, 10, 'Current\nYear', fontsize=10, color='gray')

# Add a horizontal line showing the 2025 target goal
ax1.axhline(y=50, xmin=0.85, xmax=1, color='black', linestyle=':', alpha=0.7)
ax1.text(2025.1, 51, '2025 Target (50%)', fontsize=10)

# Highlight South Region's dramatic improvement
# Draw an arrow and annotation
ax1.annotate('South Region:\nDramatic improvement\n(+51% in 4 years)',
            xy=(2025, 62), xytext=(2022, 70),
            arrowprops=dict(facecolor='#e74c3c', shrink=0.05, width=2),
            fontsize=12, fontweight='bold', color='#e74c3c')

# Shade the area where South surpasses North
intersection_year = 2023.5  # Approximate year where South passes North
ax1.axvspan(intersection_year, 2025, alpha=0.1, color='#e74c3c', 
           label='South Region Takes Lead')

# Highlight West Region's slow progress
ax1.annotate('West Region:\nSlowest growth rate\n(+12% over 8 years)',
            xy=(2025, 34), xytext=(2023, 25),
            arrowprops=dict(facecolor='#f39c12', shrink=0.05, width=2),
            fontsize=12, fontweight='bold', color='#f39c12')

# Format main chart
ax1.set_ylabel('Renewable Energy Adoption (%)', fontsize=14)
ax1.set_ylim(0, 80)
ax1.grid(True, alpha=0.3)
ax1.yaxis.set_major_formatter(PercentFormatter())

# Remove x-axis label as it will be in the smaller chart
ax1.set_xticklabels([])

# Create a legend with custom styling
legend = ax1.legend(loc='upper left', fontsize=12, frameon=True, 
                   fancybox=True, framealpha=0.8, title='Regions')
legend.get_title().set_fontsize('13')

# --- Small context chart (annual growth rates) ---
# Calculate year-over-year growth rates
north_growth = np.diff(north_region) / north_region[:-1] * 100
south_growth = np.diff(south_region) / south_region[:-1] * 100
east_growth = np.diff(east_region) / east_region[:-1] * 100
west_growth = np.diff(west_region) / west_region[:-1] * 100

# Years for the difference data (one fewer than original)
growth_years = years[1:]

# Bar chart of growth rates
bar_width = 0.2
x = np.arange(len(growth_years))
ax2.bar(x - bar_width*1.5, north_growth, bar_width, color='#3498db', label='North')
ax2.bar(x - bar_width/2, south_growth, bar_width, color='#e74c3c', label='South')
ax2.bar(x + bar_width/2, east_growth, bar_width, color='#2ecc71', label='East')
ax2.bar(x + bar_width*1.5, west_growth, bar_width, color='#f39c12', label='West')

# Formatting for the smaller chart
ax2.set_ylabel('Annual Growth Rate (%)', fontsize=12)
ax2.set_xlabel('Year', fontsize=14)
ax2.set_xticks(x)
ax2.set_xticklabels([f'{year}' for year in growth_years])
ax2.grid(axis='y', alpha=0.3)

# Highlight South's exceptional growth in 2023-2024
max_growth_idx = np.argmax(south_growth)
ax2.annotate(f'{south_growth[max_growth_idx]:.1f}%',
            xy=(max_growth_idx, south_growth[max_growth_idx]),
            xytext=(max_growth_idx, south_growth[max_growth_idx] + 5),
            fontsize=10, fontweight='bold', color='#e74c3c',
            ha='center', va='bottom')

# Titles and headlines
headline = "South Region Surpasses National Renewable Energy Target in 2025"
subtitle = "After years of lagging, the South Region has achieved the most dramatic improvement,\nsurpassing all other regions and exceeding the 2025 national target by 12%"

fig.suptitle(headline, fontsize=18, fontweight='bold', y=0.98)
fig.text(0.5, 0.94, subtitle, ha='center', fontsize=14, fontstyle='italic')

# Add explanatory text box with key insights
insights = (
    "Key Findings:\n"
    "• South Region shows highest growth rate, especially after 2021\n"
    "• North Region maintains steady, consistent growth\n"
    "• East Region keeps pace with national targets\n"
    "• West Region falls increasingly behind target despite early lead"
)
props = dict(boxstyle='round', facecolor='white', alpha=0.7)
ax1.text(2018.2, 65, insights, fontsize=12, verticalalignment='top', bbox=props)

# Add source and methodology notes
fig.text(0.5, 0.02, "Source: National Renewable Energy Database (2018-2025) | Analysis as of May 2025",
        ha='center', fontsize=10, fontstyle='italic')

# Layout adjustments
plt.tight_layout()
plt.subplots_adjust(top=0.9, hspace=0.3)
plt.show()