In [3]:
import pandas as pd
import plotly.express as px

# Load the dataset
df = pd.read_csv('/content/sales_data_sample.csv', encoding='latin-1')

# Convert ORDERDATE to datetime
df['ORDERDATE'] = pd.to_datetime(df['ORDERDATE'])

# 1. Total Revenue Over Time
fig1 = px.line(df.groupby('ORDERDATE')['SALES'].sum().reset_index(), x='ORDERDATE', y='SALES', title='Total Revenue Over Time')
fig1.show()

# 2. Total Number of Orders Over Time
fig2 = px.line(df.groupby('ORDERDATE')['ORDERNUMBER'].nunique().reset_index(), x='ORDERDATE', y='ORDERNUMBER', title='Total Number of Orders Over Time')
fig2.show()

# 3. Average Order Value (AOV) Over Time
aov_df = df.groupby('ORDERDATE').agg(TotalSales=('SALES', 'sum'), TotalOrders=('ORDERNUMBER', 'nunique')).reset_index()
aov_df['AOV'] = aov_df['TotalSales'] / aov_df['TotalOrders']
fig3 = px.line(aov_df, x='ORDERDATE', y='AOV', title='Average Order Value Over Time')
fig3.show()

# 4. Revenue by Product Line
revenue_by_product = df.groupby('PRODUCTLINE')['SALES'].sum().reset_index().sort_values(by='SALES', ascending=False)
fig4 = px.bar(revenue_by_product, x='PRODUCTLINE', y='SALES', title='Revenue by Product Line')
fig4.show()

# 5. Number of Orders by Product Line
orders_by_product = df.groupby('PRODUCTLINE')['ORDERNUMBER'].nunique().reset_index().sort_values(by='ORDERNUMBER', ascending=False)
fig5 = px.bar(orders_by_product, x='PRODUCTLINE', y='ORDERNUMBER', title='Number of Orders by Product Line')
fig5.show()

# 6. Revenue by Country
revenue_by_country = df.groupby('COUNTRY')['SALES'].sum().reset_index().sort_values(by='SALES', ascending=False)
fig6 = px.bar(revenue_by_country, x='COUNTRY', y='SALES', title='Revenue by Country')
fig6.show()

# 7. Number of Orders by Country
orders_by_country = df.groupby('COUNTRY')['ORDERNUMBER'].nunique().reset_index().sort_values(by='ORDERNUMBER', ascending=False)
fig7 = px.bar(orders_by_country, x='COUNTRY', y='ORDERNUMBER', title='Number of Orders by Country')
fig7.show()

# 8. Revenue by Customer (Top 10)
revenue_by_customer = df.groupby('CUSTOMERNAME')['SALES'].sum().reset_index().sort_values(by='SALES', ascending=False).head(10)
fig8 = px.bar(revenue_by_customer, x='CUSTOMERNAME', y='SALES', title='Top 10 Customers by Revenue')
fig8.show()

# 9. Sales Distribution by Deal Size
sales_by_deal_size = df.groupby('DEALSIZE')['SALES'].sum().reset_index().sort_values(by='SALES', ascending=False)
fig9 = px.bar(sales_by_deal_size, x='DEALSIZE', y='SALES', title='Sales Distribution by Deal Size')
fig9.show()

# 10. Quantity Ordered by Product Line
quantity_by_product = df.groupby('PRODUCTLINE')['QUANTITYORDERED'].sum().reset_index().sort_values(by='QUANTITYORDERED', ascending=False)
fig10 = px.bar(quantity_by_product, x='PRODUCTLINE', y='QUANTITYORDERED', title='Quantity Ordered by Product Line')
fig10.show()


# Install necessary libraries if not already installed
!pip install pandas plotly openpyxl

import pandas as pd
import plotly.express as px

# Load the dataset
try:
    df = pd.read_csv('/content/sales_data_sample.csv', encoding='latin-1')
except FileNotFoundError:
    print("Error: 'sales_data_sample.csv' not found. Please upload the file.")
    # Exit or handle the error appropriately
    exit()


# Convert ORDERDATE to datetime
df['ORDERDATE'] = pd.to_datetime(df['ORDERDATE'])

# --- Metrics and Analysis ---

# 1. Total Revenue Over Time
print("--- 1. Total Revenue Over Time ---")
revenue_over_time = df.groupby('ORDERDATE')['SALES'].sum().reset_index()
fig1 = px.line(revenue_over_time, x='ORDERDATE', y='SALES', title='Total Revenue Over Time')
fig1.show()

# 2. Total Number of Orders Over Time
print("--- 2. Total Number of Orders Over Time ---")
orders_over_time = df.groupby('ORDERDATE')['ORDERNUMBER'].nunique().reset_index()
fig2 = px.line(orders_over_time, x='ORDERDATE', y='ORDERNUMBER', title='Total Number of Orders Over Time')
fig2.show()

# 3. Average Order Value (AOV) Over Time
print("--- 3. Average Order Value (AOV) Over Time ---")
aov_df = df.groupby('ORDERDATE').agg(TotalSales=('SALES', 'sum'), TotalOrders=('ORDERNUMBER', 'nunique')).reset_index()
# Handle potential division by zero if there are days with 0 orders
aov_df['AOV'] = aov_df.apply(lambda row: row['TotalSales'] / row['TotalOrders'] if row['TotalOrders'] > 0 else 0, axis=1)
fig3 = px.line(aov_df, x='ORDERDATE', y='AOV', title='Average Order Value Over Time')
fig3.show()

# 4. Revenue by Product Line
print("--- 4. Revenue by Product Line ---")
revenue_by_product = df.groupby('PRODUCTLINE')['SALES'].sum().reset_index().sort_values(by='SALES', ascending=False)
fig4 = px.bar(revenue_by_product, x='PRODUCTLINE', y='SALES', title='Revenue by Product Line')
fig4.show()

# 5. Number of Orders by Product Line
print("--- 5. Number of Orders by Product Line ---")
orders_by_product = df.groupby('PRODUCTLINE')['ORDERNUMBER'].nunique().reset_index().sort_values(by='ORDERNUMBER', ascending=False)
fig5 = px.bar(orders_by_product, x='PRODUCTLINE', y='ORDERNUMBER', title='Number of Orders by Product Line')
fig5.show()

# 6. Revenue by Country
print("--- 6. Revenue by Country ---")
revenue_by_country = df.groupby('COUNTRY')['SALES'].sum().reset_index().sort_values(by='SALES', ascending=False)
fig6 = px.bar(revenue_by_country, x='COUNTRY', y='SALES', title='Revenue by Country')
fig6.show()

# 7. Number of Orders by Country
print("--- 7. Number of Orders by Country ---")
orders_by_country = df.groupby('COUNTRY')['ORDERNUMBER'].nunique().reset_index().sort_values(by='ORDERNUMBER', ascending=False)
fig7 = px.bar(orders_by_country, x='COUNTRY', y='ORDERNUMBER', title='Number of Orders by Country')
fig7.show()

# 8. Revenue by Customer (Top 10)
print("--- 8. Revenue by Customer (Top 10) ---")
revenue_by_customer = df.groupby('CUSTOMERNAME')['SALES'].sum().reset_index().sort_values(by='SALES', ascending=False).head(10)
fig8 = px.bar(revenue_by_customer, x='CUSTOMERNAME', y='SALES', title='Top 10 Customers by Revenue')
fig8.show()

# 9. Sales Distribution by Deal Size
print("--- 9. Sales Distribution by Deal Size ---")
sales_by_deal_size = df.groupby('DEALSIZE')['SALES'].sum().reset_index().sort_values(by='SALES', ascending=False)
fig9 = px.bar(sales_by_deal_size, x='DEALSIZE', y='SALES', title='Sales Distribution by Deal Size')
fig9.show()

# 10. Quantity Ordered by Product Line
print("--- 10. Quantity Ordered by Product Line ---")
quantity_by_product = df.groupby('PRODUCTLINE')['QUANTITYORDERED'].sum().reset_index().sort_values(by='QUANTITYORDERED', ascending=False)
fig10 = px.bar(quantity_by_product, x='PRODUCTLINE', y='QUANTITYORDERED', title='Quantity Ordered by Product Line')
fig10.show()


--- 1. Total Revenue Over Time ---


--- 2. Total Number of Orders Over Time ---


--- 3. Average Order Value (AOV) Over Time ---


--- 4. Revenue by Product Line ---


--- 5. Number of Orders by Product Line ---


--- 6. Revenue by Country ---


--- 7. Number of Orders by Country ---


--- 8. Revenue by Customer (Top 10) ---


--- 9. Sales Distribution by Deal Size ---


--- 10. Quantity Ordered by Product Line ---
