In [56]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [66]:
# Load the datasets
customers_path = "/content/Customers.csv"
products_path = "/content/Products.csv"
transactions_path = "/content/Transactions.csv"

# Reading the files
customers_df = pd.read_csv(customers_path)
products_df = pd.read_csv(products_path)
transactions_df = pd.read_csv(transactions_path)



In [58]:
# Convert date columns to datetime format
customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'])


**Graph - 1**

**Distribution of Customers by Region**  This interactive bar chart displays the number of customers distributed across different regions. By hovering over the bars, you can see the exact number of customers for each region. The chart includes text labels outside the bars for clarity and allows interactivity such as zooming, panning, and tooltips, making it easy to explore the data dynamically.

In [59]:
import plotly.express as px

# Analyze distribution of customers by region
region_counts = customers_df['Region'].value_counts().reset_index()
region_counts.columns = ['Region', 'Number of Customers']

#To Create an interactive bar chart
fig = px.bar(
    region_counts,
    x='Region',
    y='Number of Customers',
    color='Region',
    text='Number of Customers',
    title='Customer Distribution by Region',
    labels={'Number of Customers': 'Number of Customers', 'Region': 'Region'},
    template='plotly'
)

# To Add more interactivity and styling
fig.update_traces(textposition='outside')
fig.update_layout(
    title_font_size=18,
    xaxis_title='Region',
    yaxis_title='Number of Customers',
    xaxis_tickangle=0,
    bargap=0.5,
    width=1000,
    height=500
)

fig.show()


**Graph - 2**

**Customer Signup Trends Over the Years** This interactive line chart visualizes the number of customer signups over the years. Each data point represents the total signups for a particular year, with the exact values displayed above the points. The chart allows for interactivity, such as zooming, panning, and tooltips, making it easy to analyze yearly trends dynamically.

In [60]:
import plotly.express as px

# Prepare the data for plotting
signup_trends = customers_df['SignupDate'].dt.year.value_counts().sort_index()
signup_trends_df = signup_trends.reset_index()
signup_trends_df.columns = ['Year', 'Number of Signups']

# Create an interactive line chart
fig = px.line(
    signup_trends_df,
    x='Year',
    y='Number of Signups',
    text='Number of Signups',
    title='Customer Signup Trends Over the Years',
    markers=True,
    template='plotly'
)

# Customize the chart
fig.update_traces(textposition='top center', line=dict(color='blue', width=3))
fig.update_layout(
    title_font_size=18,
    xaxis_title='Year',
    yaxis_title='Number of Signups',
    xaxis=dict(tickmode='linear', tickangle=-45),  # Ensure distinct year ticks
    yaxis=dict(showgrid=True),
    hovermode="x"
)

# Show the interactive chart
fig.show()


**Graph - 3**

**Price Distribution of Products** This interactive histogram visualizes the distribution of product prices. It includes vertical dashed and dotted lines to highlight the mean and median prices, respectively. Annotations provide exact values for the mean and median, helping to contextualize the data. The chart is interactive, allowing users to explore the price distribution dynamically with hover tooltips and zoom/pan functionality.



In [61]:
import plotly.graph_objects as go
import numpy as np

# Calculate key statistics
mean_price = products_df['Price'].mean()
median_price = products_df['Price'].median()
std_price = products_df['Price'].std()

# Create the histogram
fig = go.Figure()

fig.add_trace(go.Histogram(
    x=products_df['Price'],
    nbinsx=20,
    marker=dict(color='skyblue', line=dict(color='black', width=1)),
    opacity=0.75,
    name='Price Distribution'
))

# Add vertical lines for mean and median
fig.add_vline(x=mean_price, line_width=2, line_dash="dash", line_color="red", annotation_text="Mean", annotation_position="top left")
fig.add_vline(x=median_price, line_width=2, line_dash="dot", line_color="green", annotation_text="Median", annotation_position="top right")

# Add text annotations for statistics
fig.add_annotation(
    x=mean_price,
    y=0,
    text=f"Mean: ${mean_price:.2f}",
    showarrow=True,
    arrowhead=2,
    arrowcolor="red",
    arrowsize=1.5,
    ax=-50,
    ay=30,
    font=dict(color="red", size=12)
)

fig.add_annotation(
    x=median_price,
    y=0,
    text=f"Median: ${median_price:.2f}",
    showarrow=True,
    arrowhead=2,
    arrowcolor="green",
    arrowsize=1.5,
    ax=50,
    ay=30,
    font=dict(color="green", size=12)
)

# Update layout
fig.update_layout(
    title=f"Price Distribution of Products<br>(Mean: ${mean_price:.2f}, Median: ${median_price:.2f}, Std Dev: ${std_price:.2f})",
    title_font_size=18,
    xaxis_title="Price (USD)",
    yaxis_title="Frequency",
    bargap=0.1,
    xaxis=dict(showgrid=True),
    yaxis=dict(showgrid=True),
    template="plotly_white"
)

# Show the plot
fig.show()


**Graph - 4**

**Product Distribution by Category** This interactive bar chart displays the number of products in each category. Each bar is color-coded, and the exact count of products is displayed above the bars. The chart also supports interactivity, allowing users to hover over bars to see detailed information, as well as zoom and pan for deeper exploration.




In [62]:
import plotly.express as px

# Prepare the data for plotting
category_counts = products_df['Category'].value_counts().reset_index()
category_counts.columns = ['Category', 'Number of Products']

# Create an interactive bar chart
fig = px.bar(
    category_counts,
    x='Category',
    y='Number of Products',
    color='Category',
    text='Number of Products',
    title='Product Distribution by Category',
    labels={'Number of Products': 'Number of Products', 'Category': 'Product Category'},
    template='plotly'
)

# Customize the layout
fig.update_traces(
    textposition='outside',
    marker=dict(line=dict(color='black', width=1))
)
fig.update_layout(
    title_font_size=18,
    xaxis_title='Product Category',
    yaxis_title='Number of Products',
    xaxis_tickangle=0,
    showlegend=False,
    bargap=0.4,
    width=1000,
    height=500
)
fig.show()


**Graph - 5**

**Monthly Revenue Trends** This interactive line chart visualizes total revenue generated each month. Each point on the line represents the total revenue for a particular month, with markers highlighting the data points. Hovering over the chart provides exact monthly revenue values, and users can zoom and pan to explore trends over specific periods.



In [63]:
import plotly.express as px

# Prepare the data for plotting
monthly_revenue = transactions_df.groupby(transactions_df['TransactionDate'].dt.to_period('M'))['TotalValue'].sum().reset_index()
monthly_revenue['TransactionMonth'] = monthly_revenue['TransactionDate'].astype(str)

# Create an interactive line chart
fig = px.line(
    monthly_revenue,
    x='TransactionMonth',
    y='TotalValue',
    title='Monthly Revenue Trends',
    labels={'TransactionMonth': 'Month', 'TotalValue': 'Total Revenue (USD)'},
    markers=True,
    template='plotly'
)

# Customize the layout
fig.update_traces(line=dict(color='green', width=3), marker=dict(size=8, color='darkgreen'))
fig.update_layout(
    title_font_size=18,
    xaxis_title='Month',
    yaxis_title='Total Revenue (USD)',
    xaxis=dict(tickangle=-45),
    yaxis=dict(showgrid=True),
    hovermode="x unified"
)

fig.show()


**Graph - 6**

**Top 10 Products by Revenue** This interactive vertical bar chart displays the top 10 products ranked by total revenue. Each bar represents a product, with the total revenue values displayed above the bars. Hovering over the bars provides additional details, and the chart allows for zooming and panning for detailed exploration.

In [64]:
import plotly.express as px

# Prepare the data
product_revenue = transactions_df.groupby('ProductID')['TotalValue'].sum().sort_values(ascending=False).head(10)
top_products = products_df[products_df['ProductID'].isin(product_revenue.index)]
top_products_revenue = pd.merge(product_revenue.reset_index(), top_products[['ProductID', 'ProductName']], on='ProductID')

# Create an interactive vertical bar chart
fig = px.bar(
    top_products_revenue,
    x='ProductName',
    y='TotalValue',
    color='ProductName',
    text='TotalValue',
    title='Top 10 Products by Revenue',
    labels={'TotalValue': 'Total Revenue (USD)', 'ProductName': 'Product Name'},
    template='plotly'
)

# Customize the layout
fig.update_traces(
    marker=dict(line=dict(color='black', width=1)),
    texttemplate='%{text:.2f}',  # Format text to two decimals
    textposition='outside'
)

fig.update_layout(
    title_font_size=18,
    xaxis_title='Product Name',
    yaxis_title='Total Revenue (USD)',
    xaxis=dict(tickangle=-45),
    yaxis=dict(showgrid=True),
    bargap=0.3,
    showlegend=False,
    width=1000,  # Reduce the graph width slightly
    height=600,  # Reduce the graph height slightly
    margin=dict(l=50, r=50, t=80, b=100)  # Adjust margins to avoid cutting
)
fig.show()
