In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.gridspec import GridSpec

# Configure visualization settings
%matplotlib inline
plt.style.use('seaborn-whitegrid')
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
plt.rcParams['legend.fontsize'] = 10

print("Libraries imported and visualization settings configured!")


In [None]:
# Create sample time series data
np.random.seed(42)
dates = pd.date_range('2023-01-01', periods=120, freq='D')

sales_data = pd.DataFrame({
    'date': dates,
    'sales': np.random.normal(loc=100, scale=15, size=120) + np.sin(np.linspace(0, 4*np.pi, 120)) * 20 + np.linspace(0, 40, 120),
    'marketing_spend': np.random.normal(loc=20, scale=5, size=120) + np.sin(np.linspace(0, 4*np.pi, 120)) * 5 + np.linspace(0, 10, 120),
    'temperature': np.random.normal(loc=70, scale=10, size=120) + np.sin(np.linspace(0, 2*np.pi, 120)) * 15
})

# Add a 'season' column
sales_data['month'] = sales_data['date'].dt.month
sales_data['season'] = 'Winter'
sales_data.loc[sales_data['month'].isin([3, 4, 5]), 'season'] = 'Spring'
sales_data.loc[sales_data['month'].isin([6, 7, 8]), 'season'] = 'Summer'
sales_data.loc[sales_data['month'].isin([9, 10, 11]), 'season'] = 'Fall'

# Add a 'promotion' column (random True/False)
sales_data['promotion'] = np.random.choice([True, False], size=120, p=[0.2, 0.8])

# Add a 'region' column
sales_data['region'] = np.random.choice(['North', 'South', 'East', 'West'], size=120)

# Add a 'customer_satisfaction' column (1-5 rating)
sales_data['customer_satisfaction'] = np.random.choice([1, 2, 3, 4, 5], size=120, p=[0.05, 0.1, 0.2, 0.4, 0.25])

# Add a 'product_category' column
sales_data['product_category'] = np.random.choice(['Electronics', 'Clothing', 'Home', 'Books', 'Sports'], size=120)

# Display the first few rows
print("Sample sales data:")
print(sales_data.head())

# Basic statistics
print("\nBasic statistics:")
print(sales_data.describe())


In [None]:
# Line plot
plt.figure(figsize=(12, 6))
plt.plot(sales_data['date'], sales_data['sales'], 'b-', linewidth=2, label='Sales')
plt.plot(sales_data['date'], sales_data['marketing_spend'] * 5, 'r--', linewidth=2, label='Marketing Spend (x5)')
plt.plot(sales_data['date'], sales_data['temperature'], 'g-.', linewidth=2, label='Temperature')

plt.title('Sales, Marketing Spend, and Temperature Over Time', fontsize=16)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Value', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()

# Scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(sales_data['marketing_spend'], sales_data['sales'], 
            c=sales_data['temperature'], cmap='viridis', 
            alpha=0.7, s=80, edgecolors='k', linewidths=0.5)

plt.title('Relationship Between Marketing Spend and Sales', fontsize=16)
plt.xlabel('Marketing Spend ($)', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.colorbar(label='Temperature (°F)')
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

# Bar chart
monthly_sales = sales_data.groupby(sales_data['date'].dt.month)['sales'].mean().reset_index()
monthly_sales['month_name'] = monthly_sales['date'].apply(lambda x: pd.Timestamp(2023, x, 1).strftime('%b'))

plt.figure(figsize=(12, 6))
bars = plt.bar(monthly_sales['month_name'], monthly_sales['sales'], color='skyblue', edgecolor='navy')

# Add data labels on top of bars
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 5,
             f'{height:.1f}', ha='center', va='bottom', fontsize=10)

plt.title('Average Monthly Sales', fontsize=16)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Average Sales ($)', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7, axis='y')
plt.tight_layout()
plt.show()

# Histogram
plt.figure(figsize=(10, 6))
plt.hist(sales_data['sales'], bins=15, color='skyblue', edgecolor='black', alpha=0.7)
plt.axvline(sales_data['sales'].mean(), color='red', linestyle='dashed', linewidth=2, label=f'Mean: {sales_data["sales"].mean():.2f}')
plt.axvline(sales_data['sales'].median(), color='green', linestyle='dashed', linewidth=2, label=f'Median: {sales_data["sales"].median():.2f}')

plt.title('Distribution of Sales', fontsize=16)
plt.xlabel('Sales ($)', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7, axis='y')
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# Pie chart
category_sales = sales_data.groupby('product_category')['sales'].sum()

plt.figure(figsize=(10, 8))
plt.pie(category_sales, labels=category_sales.index, autopct='%1.1f%%', 
        startangle=90, shadow=True, explode=[0.1, 0, 0, 0, 0],
        colors=sns.color_palette('pastel'))
plt.title('Sales by Product Category', fontsize=16)
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle
plt.tight_layout()
plt.show()

# Box plot
plt.figure(figsize=(12, 6))
plt.boxplot([sales_data['sales'], sales_data['marketing_spend'] * 5, sales_data['temperature']],
            labels=['Sales', 'Marketing Spend (x5)', 'Temperature'],
            patch_artist=True,
            boxprops=dict(facecolor='skyblue'),
            medianprops=dict(color='navy'),
            flierprops=dict(marker='o', markerfacecolor='red', markersize=8))

plt.title('Box Plot of Key Metrics', fontsize=16)
plt.ylabel('Value', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7, axis='y')
plt.tight_layout()
plt.show()

# Area plot
plt.figure(figsize=(12, 6))
plt.fill_between(sales_data['date'], sales_data['sales'], color='skyblue', alpha=0.4, label='Sales')
plt.fill_between(sales_data['date'], sales_data['marketing_spend'] * 5, color='salmon', alpha=0.4, label='Marketing Spend (x5)')

plt.title('Sales and Marketing Spend Over Time', fontsize=16)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Value', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# Violin plot
plt.figure(figsize=(12, 6))
sns.violinplot(x='season', y='sales', data=sales_data, palette='pastel', inner='quartile')
plt.title('Sales Distribution by Season', fontsize=16)
plt.xlabel('Season', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7, axis='y')
plt.tight_layout()
plt.show()

# Box plot with Seaborn
plt.figure(figsize=(12, 6))
sns.boxplot(x='region', y='sales', hue='promotion', data=sales_data, palette='Set2')
plt.title('Sales by Region and Promotion Status', fontsize=16)
plt.xlabel('Region', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7, axis='y')
plt.legend(title='Promotion')
plt.tight_layout()
plt.show()

# Swarm plot
plt.figure(figsize=(12, 6))
sns.swarmplot(x='customer_satisfaction', y='sales', data=sales_data, palette='viridis', size=8)
plt.title('Sales by Customer Satisfaction Rating', fontsize=16)
plt.xlabel('Customer Satisfaction (1-5)', fontsize=12)
plt.ylabel('Sales ($)', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7, axis='y')
plt.tight_layout()
plt.show()

# Pair plot
sns.pairplot(sales_data[['sales', 'marketing_spend', 'temperature', 'customer_satisfaction']], 
             height=2.5, diag_kind='kde', plot_kws={'alpha': 0.6})
plt.suptitle('Pair Plot of Key Metrics', y=1.02, fontsize=16)
plt.tight_layout()
plt.show()


In [None]:
# Heatmap
# Create a pivot table for the heatmap
region_category_sales = pd.pivot_table(sales_data, values='sales', 
                                      index='region', 
                                      columns='product_category', 
                                      aggfunc='mean')

plt.figure(figsize=(12, 8))
sns.heatmap(region_category_sales, annot=True, cmap='YlGnBu', fmt='.1f', linewidths=0.5)
plt.title('Average Sales by Region and Product Category', fontsize=16)
plt.xlabel('Product Category', fontsize=12)
plt.ylabel('Region', fontsize=12)
plt.tight_layout()
plt.show()

# Correlation heatmap
correlation = sales_data[['sales', 'marketing_spend', 'temperature', 'customer_satisfaction']].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation, annot=True, cmap='coolwarm', linewidths=0.5, vmin=-1, vmax=1)
plt.title('Correlation Matrix of Key Metrics', fontsize=16)
plt.tight_layout()
plt.show()

# Joint plot
plt.figure(figsize=(10, 8))
sns.jointplot(x='marketing_spend', y='sales', data=sales_data, kind='reg', 
              scatter_kws={'alpha': 0.5}, height=8)
plt.suptitle('Joint Distribution of Marketing Spend and Sales', y=1.02, fontsize=16)
plt.tight_layout()
plt.show()

# Categorical plot
plt.figure(figsize=(12, 6))
sns.catplot(x='region', y='sales', hue='season', data=sales_data, kind='bar', height=6, aspect=2, palette='viridis')
plt.title('Sales by Region and Season', fontsize=16)
plt.xlabel('Region', fontsize=12)
plt.ylabel('Average Sales ($)', fontsize=12)
plt.tight_layout()
plt.show()


In [None]:
# Subplots with different visualizations
fig = plt.figure(figsize=(15, 10))
gs = GridSpec(2, 2, figure=fig)

# Subplot 1: Line plot of sales over time
ax1 = fig.add_subplot(gs[0, 0])
ax1.plot(sales_data['date'], sales_data['sales'], 'b-', linewidth=2)
ax1.set_title('Sales Over Time', fontsize=14)
ax1.set_xlabel('Date', fontsize=10)
ax1.set_ylabel('Sales ($)', fontsize=10)
ax1.grid(True, linestyle='--', alpha=0.7)

# Subplot 2: Bar chart of sales by region
ax2 = fig.add_subplot(gs[0, 1])
region_sales = sales_data.groupby('region')['sales'].mean().sort_values(ascending=False)
sns.barplot(x=region_sales.index, y=region_sales.values, ax=ax2, palette='Blues_d')
ax2.set_title('Average Sales by Region', fontsize=14)
ax2.set_xlabel('Region', fontsize=10)
ax2.set_ylabel('Average Sales ($)', fontsize=10)
ax2.grid(True, linestyle='--', alpha=0.7, axis='y')

# Subplot 3: Scatter plot of marketing spend vs sales
ax3 = fig.add_subplot(gs[1, 0])
scatter = ax3.scatter(sales_data['marketing_spend'], sales_data['sales'], 
                     c=sales_data['temperature'], cmap='viridis', 
                     alpha=0.7, s=60, edgecolors='k', linewidths=0.5)
ax3.set_title('Marketing Spend vs Sales', fontsize=14)
ax3.set_xlabel('Marketing Spend ($)', fontsize=10)
ax3.set_ylabel('Sales ($)', fontsize=10)
ax3.grid(True, linestyle='--', alpha=0.7)
plt.colorbar(scatter, ax=ax3, label='Temperature (°F)')

# Subplot 4: Distribution of sales
ax4 = fig.add_subplot(gs[1, 1])
sns.histplot(sales_data['sales'], kde=True, ax=ax4, color='skyblue', edgecolor='black')
ax4.axvline(sales_data['sales'].mean(), color='red', linestyle='dashed', linewidth=2, label=f'Mean: {sales_data["sales"].mean():.2f}')
ax4.set_title('Sales Distribution', fontsize=14)
ax4.set_xlabel('Sales ($)', fontsize=10)
ax4.set_ylabel('Frequency', fontsize=10)
ax4.grid(True, linestyle='--', alpha=0.7, axis='y')
ax4.legend()

plt.suptitle('Sales Data Analysis Dashboard', fontsize=18)
plt.tight_layout()
plt.subplots_adjust(top=0.92)
plt.show()


In [None]:
# Custom styled visualization
# Create a custom colormap
colors = ["#ff9999", "#66b3ff", "#99ff99", "#ffcc99", "#c2c2f0"]
custom_cmap = LinearSegmentedColormap.from_list("custom_cmap", colors)

# Prepare data
category_region_sales = sales_data.groupby(['product_category', 'region'])['sales'].sum().unstack()
category_region_sales = category_region_sales.div(category_region_sales.sum(axis=1), axis=0) * 100  # Convert to percentage

# Create the plot
fig, ax = plt.subplots(figsize=(14, 8))

# Plot stacked bars
bottom = np.zeros(len(category_region_sales))
for i, region in enumerate(category_region_sales.columns):
    values = category_region_sales[region].values
    ax.bar(category_region_sales.index, values, bottom=bottom, label=region, 
           color=colors[i % len(colors)], edgecolor='white', width=0.6)
    bottom += values

# Customize the plot
ax.set_title('Sales Distribution by Product Category and Region (%)', fontsize=18, pad=20)
ax.set_xlabel('Product Category', fontsize=14, labelpad=10)
ax.set_ylabel('Percentage of Sales (%)', fontsize=14, labelpad=10)
ax.set_ylim(0, 100)
ax.grid(axis='y', linestyle='--', alpha=0.7)

# Add percentage labels on bars
for i, category in enumerate(category_region_sales.index):
    total = 0
    for region in category_region_sales.columns:
        value = category_region_sales.loc[category, region]
        if value > 5:  # Only show labels for segments > 5%
            ax.text(i, total + value/2, f'{value:.1f}%', ha='center', va='center', fontsize=10, fontweight='bold')
        total += value

# Customize ticks and legend
ax.tick_params(axis='both', which='major', labelsize=12)
ax.legend(title='Region', title_fontsize=12, fontsize=10, loc='upper right')

# Add a subtle background color
fig.patch.set_facecolor('#f8f9fa')
ax.set_facecolor('#f8f9fa')

# Add annotations
plt.annotate('Data Source: Sales Records 2023', xy=(0.02, 0.02), xycoords='figure fraction', 
             fontsize=10, color='gray', style='italic')

plt.tight_layout()
plt.show()


In [None]:
# Time series decomposition visualization
from statsmodels.tsa.seasonal import seasonal_decompose

# Prepare time series data
ts_data = sales_data.set_index('date')['sales']

# Perform time series decomposition
decomposition = seasonal_decompose(ts_data, model='additive', period=30)

# Create the visualization
fig, axes = plt.subplots(4, 1, figsize=(14, 12), sharex=True)

# Original time series
axes[0].plot(decomposition.observed, 'k-', linewidth=2)
axes[0].set_title('Original Time Series', fontsize=14)
axes[0].grid(True, linestyle='--', alpha=0.7)

# Trend component
axes[1].plot(decomposition.trend, 'b-', linewidth=2)
axes[1].set_title('Trend Component', fontsize=14)
axes[1].grid(True, linestyle='--', alpha=0.7)

# Seasonal component
axes[2].plot(decomposition.seasonal, 'g-', linewidth=2)
axes[2].set_title('Seasonal Component', fontsize=14)
axes[2].grid(True, linestyle='--', alpha=0.7)

# Residual component
axes[3].plot(decomposition.resid, 'r-', linewidth=2)
axes[3].set_title('Residual Component', fontsize=14)
axes[3].set_xlabel('Date', fontsize=12)
axes[3].grid(True, linestyle='--', alpha=0.7)

plt.suptitle('Time Series Decomposition of Sales Data', fontsize=18)
plt.tight_layout()
plt.subplots_adjust(top=0.95)
plt.show()


In [None]:
# Interactive plot with hover annotations
from matplotlib.widgets import Slider

# Create a figure and axis
fig, ax = plt.subplots(figsize=(12, 8))
plt.subplots_adjust(bottom=0.25)  # Make room for the slider

# Initial plot with all data
scatter = ax.scatter(sales_data['marketing_spend'], sales_data['sales'], 
                    c=sales_data['temperature'], cmap='viridis', 
                    alpha=0.7, s=80, edgecolors='k', linewidths=0.5)

# Add a colorbar
cbar = plt.colorbar(scatter)
cbar.set_label('Temperature (°F)', fontsize=12)

# Add title and labels
ax.set_title('Interactive Relationship Between Marketing Spend and Sales', fontsize=16)
ax.set_xlabel('Marketing Spend ($)', fontsize=12)
ax.set_ylabel('Sales ($)', fontsize=12)
ax.grid(True, linestyle='--', alpha=0.7)

# Add a slider for temperature filtering
ax_slider = plt.axes([0.2, 0.1, 0.65, 0.03], facecolor='lightgoldenrodyellow')
temp_min, temp_max = sales_data['temperature'].min(), sales_data['temperature'].max()
slider = Slider(ax_slider, 'Min Temperature (°F)', temp_min, temp_max, valinit=temp_min)

# Function to update the plot when slider value changes
def update(val):
    min_temp = slider.val
    mask = sales_data['temperature'] >= min_temp
    scatter.set_offsets(np.c_[sales_data.loc[mask, 'marketing_spend'], sales_data.loc[mask, 'sales']])
    scatter.set_array(sales_data.loc[mask, 'temperature'])
    fig.canvas.draw_idle()

slider.on_changed(update)

# Add annotation for hover
annot = ax.annotate("", xy=(0,0), xytext=(20,20), textcoords="offset points",
                    bbox=dict(boxstyle="round", fc="white", ec="black", alpha=0.8),
                    arrowprops=dict(arrowstyle="->"), fontsize=10)
annot.set_visible(False)

def update_annot(ind):
    pos = scatter.get_offsets()[ind["ind"][0]]
    annot.xy = pos
    idx = ind["ind"][0]
    text = f"Date: {sales_data.iloc[idx]['date'].strftime('%Y-%m-%d')}\nSales: ${sales_data.iloc[idx]['sales']:.2f}\nMarketing: ${sales_data.iloc[idx]['marketing_spend']:.2f}\nTemp: {sales_data.iloc[idx]['temperature']:.1f}°F\nRegion: {sales_data.iloc[idx]['region']}"
    annot.set_text(text)

def hover(event):
    vis = annot.get_visible()
    if event.inaxes == ax:
        cont, ind = scatter.contains(event)
        if cont:
            update_annot(ind)
            annot.set_visible(True)
            fig.canvas.draw_idle()
        else:
            if vis:
                annot.set_visible(False)
                fig.canvas.draw_idle()

fig.canvas.mpl_connect("motion_notify_event", hover)

plt.show()
