In [None]:
# Import libraries
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Load dataset from Google Sheets
url = "https://docs.google.com/spreadsheets/d/1tdF_beuexr4n46cuZY8P-b8JCCYN-SNZ/export?format=csv"
df = pd.read_csv(url)

# ------------------------------
# Data Cleaning & Preprocessing
# ------------------------------
# Drop irrelevant columns
df = df.drop(columns=['Item Identifier', 'Outlet Identifier'])

# Fix categorical inconsistencies
df['Item Fat Content'] = df['Item Fat Content'].replace({
    'LF': 'Low Fat',
    'low fat': 'Low Fat',
    'reg': 'Regular'
})

# Handle missing values
df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean())
df['Item Visibility'] = df['Item Visibility'].fillna(df['Item Visibility'].median())

# ------------------------------
# Statistical Moments Calculation
# ------------------------------
numerical_cols = ['Item Visibility', 'Item Weight', 'Sales', 'Rating']

print("\n" + "="*50)
print("STATISTICAL MOMENTS ANALYSIS")
print("="*50)
for col in numerical_cols:
    print(f"\nColumn: {col}")
    print(f"Mean: {df[col].mean():.2f}")
    print(f"Variance: {df[col].var():.2f}")
    print(f"Skewness: {df[col].skew():.2f}")
    print(f"Kurtosis: {df[col].kurtosis():.2f}")

# ------------------------------
# Interactive Visualization Section with Reset Buttons
# ------------------------------
def add_reset_button(fig):
    """Adds a reset button and configures legend interaction"""
    fig.update_layout(
        updatemenus=[
            dict(
                type="buttons",
                buttons=[
                    dict(
                        label="Reset All",
                        method="update",
                        args=[{"visible": [True] * len(fig.data)},
                              {"xaxis.autorange": True, 
                               "yaxis.autorange": True}]
                    )
                ],
                x=0.85,
                y=1.15,
                showactive=False
            )
        ],
        legend=dict(
            itemclick="toggleothers",  # Click legend to isolate category
            itemdoubleclick="toggle"  # Double-click legend to select only that category
        )
    )
    return fig

# 1. Interactive Scatter Plot
print("\n" + "="*50)
print("VISUALIZATION 1: Item Visibility vs Sales")
print("="*50)
fig1 = px.scatter(df, x='Item Visibility', y='Sales', 
                 color='Item Fat Content', 
                 title='Item Visibility vs Sales',
                 hover_data=['Item Weight', 'Outlet Type'],
                 opacity=0.6)
add_reset_button(fig1).update_layout(width=1000, height=600).show()

# 2. Interactive Bar Chart
print("\n" + "="*50)
print("VISUALIZATION 2: Total Sales by Outlet Type")
print("="*50)
sales_by_outlet = df.groupby('Outlet Type')['Sales'].sum().reset_index()
fig2 = px.bar(sales_by_outlet, x='Outlet Type', y='Sales', 
             color='Outlet Type',
             title='Total Sales by Outlet Type',
             text_auto='.2s')
add_reset_button(fig2).update_layout(width=1000, height=600).show()

# 3. Interactive Box Plot
print("\n" + "="*50)
print("VISUALIZATION 3: Sales Distribution by Outlet Size")
print("="*50)
fig3 = px.box(df, x='Outlet Size', y='Sales', 
             color='Outlet Size',
             title='Sales Distribution by Outlet Size')
add_reset_button(fig3).update_layout(width=1000, height=600).show()

# 4. Interactive Correlation Heatmap
print("\n" + "="*50)
print("VISUALIZATION 4: Correlation Heatmap")
print("="*50)
corr_matrix = df[numerical_cols].corr()
fig4 = go.Figure(go.Heatmap(
    z=corr_matrix.values,
    x=corr_matrix.columns,
    y=corr_matrix.columns,
    colorscale='RdBu',
    zmin=-1,
    zmax=1,
    text=corr_matrix.round(2).values,
    texttemplate="%{text}"
))
add_reset_button(fig4).update_layout(title='Correlation Heatmap', width=800, height=600).show()

# 5. Non-Interactive Pie Chart
print("\n" + "="*50)
print("VISUALIZATION 5: Sales Contribution by Outlet Type")
print("="*50)
fig5 = px.pie(sales_by_outlet, values='Sales', names='Outlet Type',
             title='Sales Contribution by Outlet Type',
             hole=0.3)
fig5.update_layout(width=800, height=600).show()

# 6. Interactive Line Plot
print("\n" + "="*50)
print("VISUALIZATION 6: Sales Trend vs Outlet Age")
print("="*50)
df['Outlet Age'] = 2023 - df['Outlet Establishment Year']
sales_by_age = df.groupby('Outlet Age')['Sales'].mean().reset_index()
fig6 = px.line(sales_by_age, x='Outlet Age', y='Sales', 
              title='Average Sales vs Outlet Age',
              markers=True)
add_reset_button(fig6).update_layout(width=1000, height=600).show()

# 7. Interactive Stacked Bar Chart
print("\n" + "="*50)
print("VISUALIZATION 7: Sales by Outlet Type & Fat Content")
print("="*50)
stacked_data = df.groupby(['Outlet Type', 'Item Fat Content'])['Sales'].sum().unstack().reset_index()
fig7 = px.bar(stacked_data, x='Outlet Type', y=['Low Fat', 'Regular'],
             title='Sales by Outlet Type and Fat Content',
             labels={'value': 'Sales'},
             barmode='stack')
add_reset_button(fig7).update_layout(width=1000, height=600).show()
