In [1]:
!pip install dash dash-bootstrap-components

Collecting dash
  Downloading dash-3.0.4-py3-none-any.whl.metadata (10 kB)
Collecting dash-bootstrap-components
  Downloading dash_bootstrap_components-2.0.2-py3-none-any.whl.metadata (18 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Downloading dash-3.0.4-py3-none-any.whl (7.9 MB)
   ---------------------------------------- 0.0/7.9 MB ? eta -:--:--
   - -------------------------------------- 0.3/7.9 MB ? eta -:--:--
   - -------------------------------------- 0.3/7.9 MB ? eta -:--:--
   -- ------------------------------------- 0.5/7.9 MB 1.1 MB/s eta 0:00:07
   --- ------------------------------------ 0.8/7.9 MB 932.9 kB/s eta 0:00:08
   --- ------------------------------------ 0.8/7.9 MB 932.9 kB/s eta 0:00:08
   ----- ---------------------------------- 1.0/7.9 MB 811.6 kB/s eta 0:00:09
   ----- ---------------------------------- 1.0/7.9 MB 811.6 kB/s eta 0:00:09
   ----- ---------------------------------- 1.0/7.9 MB 811.6 kB/s et

In [2]:
!pip install pyngrok
from pyngrok import ngrok
# public_url = ngrok.connect(8051)
# print(f'Dashboard URL: {public_url}')

Collecting pyngrok
  Downloading pyngrok-7.2.7-py3-none-any.whl.metadata (9.4 kB)
Downloading pyngrok-7.2.7-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.7


In [3]:
!ngrok authtoken 2wrgQJh3qrdU3vg5sD2qcvRphe9_67ktESae6sfmNJfNK7vcP # Replace <your_authtoken> with your actual authtoken

The system cannot find the file specified.


In [4]:
!killall ngrok

'killall' is not recognized as an internal or external command,
operable program or batch file.


In [5]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from dash import Dash, html, dcc
import dash_bootstrap_components as dbc
from scipy.stats import f_oneway, ttest_ind, ttest_rel, pearsonr
from scipy.stats.mstats import winsorize
import seaborn as sns
from pyngrok import ngrok

# Initialize the Dash app with CYBORG theme and external styles
app = Dash(
    __name__,
    external_stylesheets=[
        dbc.themes.CYBORG,
        "https://use.fontawesome.com/releases/v5.15.4/css/all.css",
        "https://fonts.googleapis.com/css2?family=Poppins:wght@600&family=Roboto:wght@400;500&display=swap"
    ]
)

# Custom CSS for enhanced styling
app.index_string = '''
<!DOCTYPE html>
<html>
    <head>
        {%metas%}
        <title>Furniture Sales Dashboard</title>
        {%favicon%}
        {%css%}
        <style>
            body {
                font-family: 'Roboto', sans-serif;
                background-color: #1a1a1a;
            }
            h1, h2, h3 {
                font-family: 'Poppins', sans-serif;
                color: #ffffff;
            }
            .card {
                background-color: #2a2a2a;
                border-radius: 10px;
                box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
                padding: 15px;
                margin-bottom: 20px;
                transition: transform 0.2s;
            }
            .card:hover {
                transform: translateY(-5px);
            }
            .nav-tabs .nav-link {
                font-size: 1.1rem;
                color: #adb5bd;
                transition: color 0.3s;
            }
            .nav-tabs .nav-link:hover {
                color: #ffffff;
            }
            .nav-tabs .nav-link.active {
                background-color: #007bff;
                color: #ffffff;
                border-color: #007bff;
            }
            .text-info {
                color: #17a2b8 !important;
            }
        </style>
    </head>
    <body>
        {%app_entry%}
        <footer>
            {%config%}
            {%scripts%}
            {%renderer%}
        </footer>
    </body>
</html>
'''

# --- Data Loading and Preparation ---
try:
    df = pd.read_csv('/content/cleaned_data.csv')
except FileNotFoundError:
    print("Error: cleaned_data.csv not found. Please update the file path.")
    data = {
        'Order Date': pd.to_datetime(['2023-01-15', '2023-01-20', '2023-02-10', '2023-02-15', '2023-03-05']),
        'City': ['New York', 'Los Angeles', 'New York', 'Chicago', 'Los Angeles'],
        'Sales': [200, 150, 300, 50, 400],
        'Quantity': [2, 1, 3, 1, 4],
        'Discount': [0.0, 0.1, 0.0, 0.2, 0.0],
        'Profit': [20, 15, 45, -10, 80],
        'Ship Mode': ['Second Class', 'Standard Class', 'Second Class', 'Standard Class', 'First Class'],
        'Segment': ['Consumer', 'Corporate', 'Consumer', 'Consumer', 'Home Office'],
        'Region': ['East', 'West', 'East', 'Central', 'West'],
        'Sub-Category': ['Chairs', 'Tables', 'Binders', 'Paper', 'Appliances'],
        'Product Name': ['Product A', 'Product B', 'Product C', 'Product D', 'Product E']
    }
    df = pd.DataFrame(data)

# Data preparation
df['Order Date'] = pd.to_datetime(df['Order Date'])
df['Weekday'] = df['Order Date'].dt.day_name()
df['Weekend'] = df['Order Date'].dt.weekday >= 5

# Winsorize numerical features
numerical_features_for_winsorize = ['Sales', 'Quantity', 'Discount', 'Profit']
for feature in numerical_features_for_winsorize:
    if feature in df.columns and pd.api.types.is_numeric_dtype(df[feature]):
        if df[feature].notna().sum() > 0:
            df[feature] = winsorize(df[feature].astype(float), limits=[0.05, 0.05])
        else:
            print(f"Warning: Column '{feature}' contains only NA values. Skipping winsorization.")
    elif feature not in df.columns:
        print(f"Warning: Column '{feature}' not found in DataFrame. Skipping winsorization.")
    else:
        print(f"Warning: Column '{feature}' is not numeric. Skipping winsorization.")

# Define numerical features
num_features = ['Sales', 'Quantity', 'Discount', 'Profit']

# --- Visualizations ---

# Visualization 1: Top Cities Pie Chart
top_cities = df['City'].value_counts().head(7)
fig1 = px.pie(
    values=top_cities.values,
    names=top_cities.index,
    title='Top 7 Cities by Sales Count',
    color_discrete_sequence=px.colors.qualitative.Bold
)
fig1.update_traces(textposition='inside', textinfo='percent+label')

# Visualization 2: Monthly Sales Line Chart
df_sorted_by_date = df.sort_values('Order Date')
monthly_sales = df_sorted_by_date.groupby(pd.Grouper(key='Order Date', freq='ME'))['Sales'].sum()
fig2 = px.line(
    x=monthly_sales.index,
    y=monthly_sales.values,
    title='Monthly Sales Trend',
    labels={'x': 'Month', 'y': 'Total Sales'},
    markers=True,
    color_discrete_sequence=['#17a2b8']  # Strong blue color
)
fig2.update_layout(xaxis_title='Month', yaxis_title='Total Sales')

# Visualization 3: Histograms for Numerical Features
fig3 = make_subplots(rows=2, cols=2, subplot_titles=[f'Distribution of {f}' for f in num_features])
for i, feature in enumerate(num_features):
    hist = go.Histogram(x=df[feature], nbinsx=30, name=feature, marker_color='#17a2b8')
    fig3.add_trace(hist, row=(i // 2) + 1, col=(i % 2) + 1)
fig3.update_layout(height=600, showlegend=False, title_text='Distributions of Numerical Features (Post-Winsorization)')

# Visualization 4: Correlation Heatmap
valid_num_features = [f for f in num_features if f in df.columns and pd.api.types.is_numeric_dtype(df[f])]
if valid_num_features:
    corr_matrix = df[valid_num_features].corr()
    fig4 = go.Figure(data=go.Heatmap(
        z=corr_matrix.values,
        x=corr_matrix.columns,
        y=corr_matrix.index,
        colorscale='Viridis',
        zmin=-1,
        zmax=1,
        text=corr_matrix.values.round(2),
        texttemplate='%{text}',
        textfont={'size': 12}
    ))
    fig4.update_layout(title='Correlation Heatmap of Numerical Features', height=500)
else:
    fig4 = go.Figure().update_layout(title='Correlation Heatmap (No valid numerical features found)')

# Visualization 5: Box Plots
fig5 = make_subplots(rows=2, cols=2, subplot_titles=[f'Box Plot of {f}' for f in num_features])
for i, feature in enumerate(num_features):
    box = go.Box(y=df[feature], name=feature, marker_color='#17a2b8')
    fig5.add_trace(box, row=(i // 2) + 1, col=(i % 2) + 1)
fig5.update_layout(title='Box Plots of Numerical Features (After Capping)', height=600, showlegend=False)

# Visualization 6: Categorical Feature Count Plots
categorical_features = ['Ship Mode', 'Segment', 'Region', 'Sub-Category']
fig6 = make_subplots(rows=2, cols=2, subplot_titles=[f'{f} Count' for f in categorical_features])
for i, feature in enumerate(categorical_features):
    if feature in df.columns:
        counts = df[feature].value_counts()
        bar = go.Bar(x=counts.index, y=counts.values, name=feature, marker_color=px.colors.qualitative.Bold)
        fig6.add_trace(bar, row=(i // 2) + 1, col=(i % 2) + 1)
    else:
        print(f"Warning: Categorical feature '{feature}' not found.")
fig6.update_layout(height=700, showlegend=False, title_text='Counts of Key Categorical Features')
fig6.update_xaxes(tickangle=-30)

# Visualization 7: Region Count Bar Chart
if 'Region' in df.columns:
    region_counts = df['Region'].value_counts()
    fig7 = px.bar(
        x=region_counts.index,
        y=region_counts.values,
        title='Order Count by Region',
        labels={'x': 'Region', 'y': 'Number of Orders'},
        color=region_counts.index,
        color_discrete_sequence=px.colors.qualitative.Bold,
        text=region_counts.values
    )
    fig7.update_traces(textposition='outside')
else:
    fig7 = go.Figure().update_layout(title='Region Count (Region column not found)')

# Visualization 8: Categorical Feature Pie Charts
fig8 = make_subplots(rows=2, cols=2, subplot_titles=[f'{f} Distribution' for f in categorical_features], specs=[[{'type': 'pie'}]*2]*2)
for i, feature in enumerate(categorical_features):
    if feature in df.columns:
        counts = df[feature].value_counts()
        pie = go.Pie(labels=counts.index, values=counts.values, name=feature, marker_colors=px.colors.qualitative.Bold)
        fig8.add_trace(pie, row=(i // 2) + 1, col=(i % 2) + 1)
    else:
        print(f"Warning: Categorical feature '{feature}' not found for pie chart.")
fig8.update_traces(textposition='inside', textinfo='percent+label')
fig8.update_layout(height=700, showlegend=False, title_text='Distribution within Key Categorical Features')

# Visualization 9: Sales Histogram
fig9 = px.histogram(
    df,
    x='Sales',
    nbins=30,
    title='Distribution of Sales (After Capping)',
    labels={'x': 'Sales', 'y': 'Frequency'},
    color_discrete_sequence=['#17a2b8']
)

# Visualization 10: Region vs. Segment Count Plot
if 'Region' in df.columns and 'Segment' in df.columns:
    fig10 = px.histogram(
        df,
        x='Region',
        color='Segment',
        barmode='group',
        title='Order Count by Region and Segment',
        labels={'x': 'Region', 'y': 'Number of Orders'},
        color_discrete_sequence=px.colors.qualitative.Bold
    )
else:
    fig10 = go.Figure().update_layout(title='Region vs. Segment Count (Columns not found)')

# Visualization 11: Violin Plot for Profit by Segment
if 'Segment' in df.columns and 'Profit' in df.columns:
    fig11 = px.violin(
        df,
        x='Segment',
        y='Profit',
        title='Profit Distribution Across Segments',
        color='Segment',
        box=True,
        points='all',
        color_discrete_sequence=px.colors.qualitative.Bold
    )
else:
    fig11 = go.Figure().update_layout(title='Profit by Segment (Columns not found)')

# Visualization 12: Top 10 Products by Sales
if 'Product Name' in df.columns and 'Sales' in df.columns:
    top_products = df.groupby('Product Name')['Sales'].sum().sort_values(ascending=True).tail(10)
    fig12 = px.bar(
        x=top_products.values,
        y=top_products.index,
        orientation='h',
        title='Top 10 Products by Total Sales',
        labels={'y': '', 'x': 'Total Sales'},
        color=top_products.values,
        color_continuous_scale=px.colors.sequential.Teal,
        text=top_products.values.round(0)
    )
    fig12.update_traces(textposition='outside')
    fig12.update_layout(
        height=500,
        yaxis=dict(tickfont=dict(size=12), automargin=True),
        xaxis_title='Total Sales',
        margin=dict(l=250, r=50, t=50, b=50)
    )
else:
    fig12 = go.Figure().update_layout(title='Top 10 Products (Columns not found)')

# Visualization 13: Total Sales by Sub-Category
if 'Sub-Category' in df.columns and 'Sales' in df.columns:
    subcat_sales = df.groupby('Sub-Category')['Sales'].sum().sort_values(ascending=False)
    fig13 = px.bar(
        x=subcat_sales.index,
        y=subcat_sales.values,
        title='Total Sales by Sub-Category',
        labels={'x': 'Sub-Category', 'y': 'Total Sales'},
        color=subcat_sales.index,
        color_discrete_sequence=px.colors.qualitative.Bold,
        text=subcat_sales.values.round(0)
    )
    fig13.update_traces(textposition='outside')
    fig13.update_layout(xaxis_tickangle=-45)
else:
    fig13 = go.Figure().update_layout(title='Sales by Sub-Category (Columns not found)')

# Visualization 14: Monthly Sales Trend by Sub-Category
if 'Sub-Category' in df.columns and 'Sales' in df.columns:
    sales_trend = df.groupby([pd.Grouper(key='Order Date', freq='ME'), 'Sub-Category'])['Sales'].sum().reset_index()
    fig14 = px.line(
        sales_trend,
        x='Order Date',
        y='Sales',
        color='Sub-Category',
        title='Monthly Sales Trend by Sub-Category',
        markers=True,
        color_discrete_sequence=px.colors.qualitative.Bold
    )
    fig14.update_layout(height=600)
else:
    fig14 = go.Figure().update_layout(title='Monthly Sales Trend by Sub-Category (Columns not found)')

# Visualization 15: Sales Heatmap by Sub-Category and Month
if 'Sub-Category' in df.columns and 'Sales' in df.columns:
    df['Order Date'] = pd.to_datetime(df['Order Date'])
    df['OrderMonth'] = df['Order Date'].dt.to_period('M')
    sales_pivot = pd.pivot_table(data=df, values='Sales', index='Sub-Category', columns='OrderMonth', aggfunc='sum').fillna(0)
    sales_pivot.columns = sales_pivot.columns.astype(str)
    fig15 = go.Figure(data=go.Heatmap(
        z=sales_pivot.values,
        x=sales_pivot.columns,
        y=sales_pivot.index,
        colorscale='Teal',
        text=sales_pivot.values.round(0),
        texttemplate='%{text}',
        textfont={'size': 9},
        hoverongaps=False
    ))
    fig15.update_layout(
        title='Sales Heatmap by Sub-Category and Month',
        height=800,
        width=1200,
        xaxis_tickangle=-45,
        yaxis=dict(tickfont=dict(size=11), automargin=True),
        xaxis=dict(tickfont=dict(size=10), automargin=True)
    )
else:
    fig15 = go.Figure().update_layout(title='Sales Heatmap (Columns not found or date conversion issue)')

# Visualization 16: Sales vs. Profit Scatter Plot
if 'Sales' in df.columns and 'Profit' in df.columns and 'Sub-Category' in df.columns:
    fig16 = px.scatter(
        df,
        x='Sales',
        y='Profit',
        color='Sub-Category',
        title='Sales vs Profit by Sub-Category (After Capping)',
        hover_data=['Product Name'],
        color_discrete_sequence=px.colors.qualitative.Bold
    )
    fig16.update_layout(height=600)
else:
    fig16 = go.Figure().update_layout(title='Sales vs Profit Scatter Plot (Columns not found)')

# --- Hypothesis Testing Section ---
hypothesis_results = []
interesting_fact = "No interesting facts generated yet."

if all(c in df.columns for c in ['Sales', 'Region', 'Discount', 'Profit', 'Weekend']):
    regions = df['Region'].unique()
    sales_by_region = [df[df['Region'] == region]['Sales'].dropna() for region in regions]
    if len(sales_by_region) > 1 and all(len(group) >= 2 for group in sales_by_region):
        f_stat, p_value = f_oneway(*sales_by_region)
        conclusion = 'Reject' if p_value < 0.05 else 'Fail to Reject'
        interpretation = 'Sales vary significantly across regions.' if p_value < 0.05 else 'No significant difference in sales across regions.'
        hypothesis_results.append(f"Hypothesis 1: Regional Sales Difference (ANOVA)\nF-statistic: {f_stat:.4f}, P-value: {p_value:.4f}\n{conclusion} Null Hypothesis: {interpretation}")
    else:
        hypothesis_results.append("Hypothesis 1: Regional Sales Difference (ANOVA)\nCould not perform test: requires at least two regions with data.")

    median_discount = df['Discount'].median()
    high_discount_profit = df[df['Discount'] > median_discount]['Profit'].dropna()
    low_discount_profit = df[df['Discount'] <= median_discount]['Profit'].dropna()
    if len(high_discount_profit) >= 2 and len(low_discount_profit) >= 2:
        t_stat, p_value = ttest_ind(high_discount_profit, low_discount_profit, equal_var=False)
        conclusion = 'Reject' if p_value < 0.05 else 'Fail to Reject'
        interpretation = 'Profit differs significantly between high and low discount groups.' if p_value < 0.05 else 'No significant difference in profit based on median discount.'
        hypothesis_results.append(f"Hypothesis 2: Discount vs. Profit (T-Test)\nT-statistic: {t_stat:.4f}, P-value: {p_value:.4f}\n{conclusion} Null Hypothesis: {interpretation}")
    else:
        hypothesis_results.append("Hypothesis 2: Discount vs. Profit (T-Test)\nCould not perform test: requires data in both high and low discount groups.")

    weekend_sales = df[df['Weekend'] == True]['Sales'].dropna()
    weekday_sales = df[df['Weekend'] == False]['Sales'].dropna()
    if len(weekend_sales) >= 2 and len(weekday_sales) >= 2:
        t_stat, p_value = ttest_ind(weekend_sales, weekday_sales, equal_var=False)
        conclusion = 'Reject' if p_value < 0.05 else 'Fail to Reject'
        interpretation = 'Sales differ significantly between weekends and weekdays.' if p_value < 0.05 else 'No significant difference in sales between weekends and weekdays.'
        hypothesis_results.append(f"Hypothesis 3: Weekend vs. Weekday Sales (T-Test)\nT-statistic: {t_stat:.4f}, P-value: {p_value:.4f}\n{conclusion} Null Hypothesis: {interpretation}")
    else:
        hypothesis_results.append("Hypothesis 3: Weekend vs. Weekday Sales (T-Test)\nCould not perform test: requires sales data for both weekends and weekdays.")

    df_corr = df[['Discount', 'Sales']].dropna()
    if len(df_corr) >= 2:
        corr, p_value = pearsonr(df_corr['Discount'], df_corr['Sales'])
        conclusion = 'Reject' if p_value < 0.05 else 'Fail to Reject'
        interpretation = f"Significant {'positive' if corr > 0 else 'negative'} correlation between Discount and Sales." if p_value < 0.05 else 'No significant linear correlation between Discount and Sales.'
        hypothesis_results.append(f"Hypothesis 4: Discount vs. Sales (Correlation)\nPearson Correlation: {corr:.4f}, P-value: {p_value:.4f}\n{conclusion} Null Hypothesis: {interpretation}")
    else:
        hypothesis_results.append("Hypothesis 4: Discount vs. Sales (Correlation)\nCould not perform test: requires at least 2 pairs of non-missing Discount and Sales values.")

    fig17 = make_subplots(rows=1, cols=3, subplot_titles=['Discount vs Profit Groups', 'Sales by Region', 'Weekend vs Weekday Sales'])
    df['Discount Group'] = df['Discount'].apply(lambda x: 'High Discount' if x > median_discount else 'Low Discount')
    fig17.add_trace(go.Box(x=df['Discount Group'], y=df['Profit'], marker_color='#17a2b8', name='Profit by Discount'), row=1, col=1)
    fig17.add_trace(go.Box(x=df['Region'], y=df['Sales'], marker_color='#17a2b8', name='Sales by Region'), row=1, col=2)
    fig17.add_trace(go.Box(x=df['Weekend'].map({True: 'Weekend', False: 'Weekday'}), y=df['Sales'], marker_color='#17a2b8', name='Sales by Day Type'), row=1, col=3)
    fig17.update_layout(height=450, showlegend=False, title="Visual Comparison for Hypothesis Tests")

    if 'Sub-Category' in df.columns:
        profit_margin_data = df.groupby('Sub-Category').agg(
            TotalSales=('Sales', 'sum'),
            TotalProfit=('Profit', 'sum')
        ).reset_index()
        profit_margin_data['ProfitMargin'] = np.where(
            profit_margin_data['TotalSales'] != 0,
            (profit_margin_data['TotalProfit'] / profit_margin_data['TotalSales']) * 100,
            0
        )
        profit_margin_data = profit_margin_data.sort_values('ProfitMargin', ascending=False)
        fig18 = px.bar(
            profit_margin_data,
            x='Sub-Category',
            y='ProfitMargin',
            title='Profit Margin (%) by Sub-Category',
            labels={'ProfitMargin': 'Profit Margin (%)', 'Sub-Category': 'Sub-Category'},
            color='Sub-Category',
            color_discrete_sequence=px.colors.qualitative.Bold,
            text=profit_margin_data['ProfitMargin'].round(1)
        )
        fig18.update_traces(texttemplate='%{text}%', textposition='outside')
        fig18.update_layout(xaxis_tickangle=-45, height=500)
    else:
        fig18 = go.Figure().update_layout(title='Profit Margin by Sub-Category (Columns not found)')

    discount_profit_data = pd.DataFrame({
        'Discount Level': ['High Discount (>Median)', 'Low Discount (<=Median)'],
        'Average Profit': [high_discount_profit.mean() if len(high_discount_profit)>0 else 0,
                           low_discount_profit.mean() if len(low_discount_profit)>0 else 0]
    })
    fig19 = px.bar(
        discount_profit_data,
        x='Discount Level',
        y='Average Profit',
        title='Average Profit by Discount Level',
        labels={'Average Profit': 'Average Profit ($)'},
        color='Discount Level',
        color_discrete_sequence=px.colors.qualitative.Bold,
        text=discount_profit_data['Average Profit'].round(2)
    )
    fig19.update_traces(textposition='outside')
    fig19.update_layout(height=400)

    high_discount_loss = df[(df['Discount'] > 0.5) & (df['Profit'] < 0)]
    if not high_discount_loss.empty:
        total_loss = high_discount_loss['Profit'].abs().sum()
        interesting_fact = f"Discounts over 50% resulted in a total loss of ${total_loss:,.2f} across {len(high_discount_loss)} transactions."
    else:
        interesting_fact = "No transactions found with discounts over 50% resulting in a loss."
else:
    print("Warning: One or more columns required for Hypothesis Testing ('Sales', 'Region', 'Discount', 'Profit', 'Weekend') not found. Skipping tests and related plots.")
    fig17 = go.Figure().update_layout(title='Hypothesis Testing Plots (Data Missing)')
    fig18 = go.Figure().update_layout(title='Profit Margin Plot (Data Missing)')
    fig19 = go.Figure().update_layout(title='Discount vs Profit Plot (Data Missing)')
    hypothesis_results = ["Hypothesis testing requires 'Sales', 'Region', 'Discount', 'Profit', and 'Weekend' columns."]
    interesting_fact = "Data missing for analysis."

# --- Dashboard Layout ---
app.layout = dbc.Container([
    # Header
    dbc.Row([
        dbc.Col([
            html.H1('Furniture Sales Dashboard', className='text-center mb-2', style={'color': '#ffffff'}),
            html.P('Interactive Analysis of Sales, Profits, and Trends', className='text-center text-muted mb-4')
        ], width=12)
    ]),
    # Tabs with Icons
    dbc.Tabs([
        dbc.Tab(
            label='Overview',
            tab_id='tab-overview',
            label_style={'padding': '10px'},
            children=[
                html.I(className='fas fa-tachometer-alt mr-2'),
                html.Span('Overview'),
                dbc.Row([
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig1), className='card'), lg=6, md=12, className='mb-4'),
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig2), className='card'), lg=6, md=12, className='mb-4')
                ], className='mt-4'),
                dbc.Row([
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig9), className='card'), lg=6, md=12, className='mb-4'),
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig7), className='card'), lg=6, md=12, className='mb-4')
                ])
            ]
        ),
        dbc.Tab(
            label='Numerical Features',
            tab_id='tab-numerical',
            label_style={'padding': '10px'},
            children=[
                html.I(className='fas fa-chart-bar mr-2'),
                html.Span('Numerical Features'),
                dbc.Row([
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig3), className='card'), width=12, className='mb-4')
                ], className='mt-4'),
                dbc.Row([
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig5), className='card'), width=12, className='mb-4')
                ]),
                dbc.Row([
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig4), className='card'), width=12, className='mb-4')
                ])
            ]
        ),
        dbc.Tab(
            label='Categorical Features',
            tab_id='tab-categorical',
            label_style={'padding': '10px'},
            children=[
                html.I(className='fas fa-tags mr-2'),
                html.Span('Categorical Features'),
                dbc.Row([
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig6), className='card'), width=12, className='mb-4')
                ], className='mt-4'),
                dbc.Row([
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig8), className='card'), width=12, className='mb-4')
                ]),
                dbc.Row([
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig10), className='card'), lg=6, md=12, className='mb-4'),
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig11), className='card'), lg=6, md=12, className='mb-4')
                ])
            ]
        ),
        dbc.Tab(
            label='Product & Sub-Category Analysis',
            tab_id='tab-product',
            label_style={'padding': '10px'},
            children=[
                html.I(className='fas fa-box-open mr-2'),
                html.Span('Product & Sub-Category Analysis'),
                dbc.Row([
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig12), className='card'), width=12, className='mb-4')
                ], className='mt-4'),
                dbc.Row([
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig13), className='card'), width=12, className='mb-4')
                ]),
                dbc.Row([
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig18), className='card'), width=12, className='mb-4')
                ]),
                dbc.Row([
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig16), className='card'), width=12, className='mb-4')
                ])
            ]
        ),
        dbc.Tab(
            label='Time Series Analysis',
            tab_id='tab-time-series',
            label_style={'padding': '10px'},
            children=[
                html.I(className='fas fa-clock mr-2'),
                html.Span('Time Series Analysis'),
                dbc.Row([
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig14), className='card'), width=12, className='mb-4')
                ], className='mt-4'),
                dbc.Row([
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig15), className='card'), width=12, className='mb-4')
                ])
            ]
        ),
        dbc.Tab(
            label='Insights & Hypothesis Testing',
            tab_id='tab-insights',
            label_style={'padding': '10px'},
            children=[
                html.I(className='fas fa-lightbulb mr-2'),
                html.Span('Insights & Hypothesis Testing'),
                dbc.Row([
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig17), className='card'), width=12, className='mb-4')
                ], className='mt-4'),
                dbc.Row([
                    dbc.Col(dbc.Card(dcc.Graph(figure=fig19), className='card'), width=12, className='mb-4')
                ]),
                dbc.Row([
                    dbc.Col(dbc.Card([
                        html.H3('Key Fact', className='mt-4'),
                        html.P(interesting_fact, className='text-info', style={'fontSize': '1.1em'})
                    ], className='card'), width=12, className='mb-4')
                ]),
                dbc.Row([
                    dbc.Col(dbc.Card([
                        html.H3('Hypothesis Testing Results', className='mt-4'),
                        dcc.Markdown('\n\n---\n\n'.join(hypothesis_results), style={'whiteSpace': 'pre-wrap', 'color': '#adb5bd'})
                    ], className='card'), width=12, className='mb-4')
                ])
            ]
        )
    ])
], fluid=True)

# --- Run the App ---
if __name__ == '__main__':
    # Set debug=False for production deployment
    app.run(debug=True, port=8051)

# # --- Run the App ---
# if __name__ == '__main__':
#     # Terminate any existing ngrok tunnels
#     ngrok.kill()
#     # Start a new ngrok tunnel
#     public_url = ngrok.connect(8051)
#     print(f"Dash app running at: {public_url}")
#     # Run the app
#     app.run(debug=True, port=8051)

Error: cleaned_data.csv not found. Please update the file path.


In [6]:
public_url = ngrok.connect(8051)
print(f'Dashboard URL: {public_url}')

t=2025-05-09T18:39:50+0300 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
t=2025-05-09T18:39:50+0300 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
t=2025-05-09T18:39:50+0300 lvl=eror msg="terminating with error" obj=app err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018

PyngrokNgrokError: The ngrok process errored on start: authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n.