In [35]:
#Importing Required libraries
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def analyze_sales_and_patterns(sales_df, customer_df):
    #Creating subplots 
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=(
            'Top Categories by Total Sales ($)', 
            'Sales Distribution by Weather',
            'Top 5 Customers by Total Spend',
            'Average Transaction Value by Weather',
            'Transaction Count by Category',
            'Transaction Count by Weather'),
        vertical_spacing=0.12,
        horizontal_spacing=0.1
    )
    
    #Considering Categories for Analysis
    product_sales = sales_df.groupby('category').agg({
        'total_amount': 'sum',
        'transactionID': 'count'
    }).reset_index()
    top_categories = product_sales.nlargest(3, 'total_amount')
    
    # Add top categories bar chart
    fig.add_trace(
        go.Bar(x=top_categories['category'], 
               y=top_categories['total_amount'],
               marker_color='#1f77b4',
               name='Total Sales'),
        row=1, col=1
    )
    
    #Weather Analysis
    weather_sales = sales_df.groupby('weather_condition').agg({
        'total_amount': ['sum', 'mean'],
        'transactionID': 'count'
    }).reset_index()
    weather_sales.columns = ['weather_condition', 'total_sales', 'avg_sale', 'transactions']
    weather_sales = weather_sales.sort_values('total_sales', ascending=True)
    
    #Customer Analysis
    customer_df['total_spend'] = customer_df['num_items'] * customer_df['avg_price']
    top_customers = customer_df.nlargest(5, 'total_spend')
    
    #Adding weather charts
    fig.add_trace(
        go.Bar(x=weather_sales['weather_condition'], 
               y=weather_sales['total_sales'],
               marker_color='#2ca02c',
               name='Total Sales by Weather'),
        row=1, col=2
    )
    
    #Adding top customers bar chart
    fig.add_trace(
        go.Bar(
            x=top_customers['customer_id'].astype(str),
            y=top_customers['total_spend'],
            marker_color='#ff7f0e',
            name='Top 5 Customers'
        ),
        row=2, col=1
    )
    
    fig.add_trace(
        go.Bar(x=weather_sales['weather_condition'], 
               y=weather_sales['avg_sale'],
               marker_color='#d62728',
               name='Avg Transaction'),
        row=2, col=2
    )
    
    #Adding transaction count by category
    fig.add_trace(
        go.Bar(
            x=top_categories['category'],
            y=top_categories['transactionID'],
            marker_color='#9467bd',
            name='Transaction Count'
        ),
        row=3, col=1
    )
    
    fig.add_trace(
        go.Bar(x=weather_sales['weather_condition'], 
               y=weather_sales['transactions'],
               marker_color='#8c564b',
               name='Transaction Count'),
        row=3, col=2
    )
    
    #Updating layout
    fig.update_layout(
        height=1200,
        width=1200,
        showlegend=True,
        title_text="Sales Analysis: Categories, Customers, and Weather Impact",
        title_x=0.5,
        title_font_size=20
    )
    
    #Updating axes
    fig.update_xaxes(tickangle=45)
    
    fig.show()
    
    
    

# Run the analysis
analyze_sales_and_patterns(sales_df, customer_df)

╠═══════════════════╬═════════════════════╬══════════════════==═╣
║                 TOP 3 CATEGORIES BY SALES                     ║
╠═══════════════════╦═════════════════════╦═══════════════==════╣
║     Category      ║    Total Sales      ║   Transactions      ║
╠═══════════════════╬═════════════════════╬══════════════════==═╣
║ Snow Equipment    ║ $      52,703.13    ║             481     ║
║ Outdoor Furniture ║ $      50,780.70    ║             499     ║
║ Winter Clothing   ║ $      43,152.09    ║             507     ║
╚═══════════════════╩═════════════════════╩═══════════════════==╝
                 TOP 3 CUSTOMERS PROFILE                       
╠═══════════════════╬═════════════════════╬═══════════════════╣
 Customer ID:           54049                                  
 Gender:                F                                      
 Age:                   33.0                                   
 Total Spend:           $608.40                                
 Number of Items:     

In [23]:
import pandas as pd
import plotly.express as px

def create_enhanced_state_map(store_df):
    #Calculating total sales per state
    state_sales = store_df.groupby('state')['monthly_sales'].sum().reset_index()
    
    #Dictionary for state centroids (approximate center points) for accuracy
    state_centroids = {
        'AL': (-86.79113, 32.806671), 'AZ': (-111.431221, 34.048928),
        'AR': (-92.373123, 34.969704), 'CA': (-119.681564, 36.116203),
        'CO': (-105.311104, 39.059811), 'CT': (-72.755371, 41.597782),
        'DE': (-75.507141, 39.318523), 'FL': (-81.686783, 27.664827),
        'GA': (-83.643074, 32.165622), 'ID': (-114.478828, 44.240459),
        'IL': (-88.986137, 40.349457), 'IN': (-86.258278, 39.849426),
        'IA': (-93.210526, 42.011539), 'KS': (-96.726486, 38.526600),
        'KY': (-84.670067, 37.668140), 'LA': (-91.867805, 31.169546),
        'ME': (-69.381927, 44.693947), 'MD': (-76.802101, 39.063946),
        'MA': (-71.530106, 42.230171), 'MI': (-84.536095, 43.326618),
        'MN': (-93.900192, 45.694454), 'MS': (-89.678696, 32.741646),
        'MO': (-92.288368, 38.456085), 'MT': (-110.454353, 46.921925),
        'NE': (-98.268082, 41.125370), 'NV': (-117.055374, 38.313515),
        'NH': (-71.563896, 43.452492), 'NJ': (-74.521011, 40.298904),
        'NM': (-106.248482, 34.840515), 'NY': (-74.948051, 42.165726),
        'NC': (-79.806419, 35.630066), 'ND': (-99.784012, 47.528912),
        'OH': (-82.764915, 40.388783), 'OK': (-96.928917, 35.565342),
        'OR': (-122.070938, 44.572021), 'PA': (-77.209755, 40.590752),
        'RI': (-71.511780, 41.680893), 'SC': (-80.945007, 33.856892),
        'SD': (-99.438828, 44.299782), 'TN': (-86.692345, 35.747845),
        'TX': (-97.563461, 31.054487), 'UT': (-111.862434, 40.150032),
        'VT': (-72.710686, 44.045876), 'VA': (-78.169968, 37.769337),
        'WA': (-121.490494, 47.400902), 'WV': (-80.954453, 38.491226),
        'WI': (-89.616508, 44.268543), 'WY': (-107.290284, 42.755966)
    }
    
    #Creating the choropleth map
    fig = px.choropleth(
        state_sales,
        locations='state',
        locationmode='USA-states',
        color='monthly_sales',
        scope='usa',
        color_continuous_scale=[
            [0, '#FFEB3B'],     # Yellow
            [0.25, '#FFA726'],  # Orange
            [0.5, '#FF7043'],   # Light Red
            [0.75, '#E53935'],  # Red
            [1, '#B71C1C']      # Dark Red
        ],
        labels={'monthly_sales': 'Monthly Sales ($)'},
        title='Retail Store Performance by State'
    )
    
    #Adding store locations
    fig.add_scattergeo(
        lon=store_df['longitude'],
        lat=store_df['latitude'],
        mode='markers',
        marker=dict(
            size=8,
            color='blue',
            opacity=0.7,
            line=dict(width=1, color='white')
        ),
        hovertemplate='<b>Store Details</b><br>' +
                      'Monthly Sales: $%{text:,.2f}<br>' +
                      'Category: %{customdata[0]}<br>' +
                      'Size: %{customdata[1]:,} sq.ft<br>' +
                      '<extra></extra>',
        text=store_df['monthly_sales'],
        customdata=store_df[['category', 'size_sqft']].values,
        name='Store Locations'
    )
    
    #Adding state abbreviations
    for state in state_sales['state']:
        if state in state_centroids:
            lon, lat = state_centroids[state]
            fig.add_scattergeo(
                lon=[lon],
                lat=[lat],
                mode='text',
                text=[state],
                textfont=dict(
                    size=10,
                    color='black',
                    family='Arial Black'
                ),
                showlegend=False,
                hoverinfo='skip'
            )
    
    #Updating layout
    fig.update_layout(
        title_font_size=24,
        geo=dict(
            scope='usa',
            showsubunits=True,
            subunitcolor='black',
            subunitwidth=1,
            showland=True,
            landcolor='rgb(250, 250, 250)',
            showlakes=True,
            lakecolor='rgb(255, 255, 255)',
            showcoastlines=True,
            coastlinecolor='black',
            coastlinewidth=1,
            projection_type='albers usa',
            showframe=True,
            framecolor='black',
            framewidth=2,
            resolution=50
        ),
        width=1300,
        height=800,
        paper_bgcolor='white',
        plot_bgcolor='white',
        margin=dict(l=0, r=0, t=50, b=0)
    )
    
    #Updating colorbar
    fig.update_coloraxes(
        colorbar_title='Monthly Sales ($)',
        colorbar_tickprefix='$',
        colorbar_tickformat=',.0f',
        colorbar_len=0.75,
        colorbar_thickness=20,
        colorbar_x=0.95
    )

    #Updating hover template for states
    fig.update_traces(
        hovertemplate='<b>%{location}</b><br>' +
                      'State Total Sales: $%{z:,.2f}<extra></extra>',
        selector=dict(type='choropleth')
    )

    fig.show()

# Read data and create map
store_df = pd.read_csv('store_data.csv')
create_enhanced_state_map(store_df)

In [16]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')

def analyze_time_trends(sales_df, store_df):
    #Converting date format properly - now handling "1/1/23" format
    sales_df['date'] = pd.to_datetime(sales_df['date'], format='%m/%d/%y')
    
    #Filtering out January 2024
    sales_df = sales_df[sales_df['date'] < '2024-01-01']
    
    #Calculating total sales across ALL stores
    monthly_sales = sales_df.groupby([sales_df['date'].dt.to_period('M'), 'storeID']).agg({
        'total_amount': 'sum',
        'transactionID': 'count'
    }).reset_index()
    
    #Sum up sales across all stores for each month
    total_monthly_sales = monthly_sales.groupby('date').agg({
        'total_amount': 'sum',
        'transactionID': 'sum'
    }).reset_index()
    
    #Printing store-level statistics
    print("Store-Level Monthly Sales Statistics:")
    store_monthly_sales = store_df['monthly_sales']
    print(f"Highest Store Monthly Sales: ${store_monthly_sales.max():,.2f}")
    print(f"Average Store Monthly Sales: ${store_monthly_sales.mean():,.2f}")
    print(f"Lowest Store Monthly Sales: ${store_monthly_sales.min():,.2f}")
    
    #Creating interactive time series plot
    fig = go.Figure()
    
    fig.add_trace(
        go.Scatter(
            x=total_monthly_sales['date'].astype(str),
            y=total_monthly_sales['total_amount'],
            name='Total Sales',
            line=dict(color='blue'),
            yaxis='y'
        )
    )
    
    fig.add_trace(
        go.Scatter(
            x=total_monthly_sales['date'].astype(str),
            y=total_monthly_sales['transactionID'],
            name='Number of Transactions',
            line=dict(color='red'),
            yaxis='y2'
        )
    )
    
    fig.update_layout(
        title='Monthly Sales and Transaction Trends (All Stores Combined)',
        xaxis_title='Month',
        yaxis_title='Total Sales ($)',
        yaxis2=dict(
            title='Number of Transactions',
            overlaying='y',
            side='right'
        ),
        width=1200,
        height=600,
        showlegend=True,
        hovermode='x unified'
    )
    
    fig.show()

#Loading and run the analysis
sales_df = pd.read_csv('sales_data.csv')
store_df = pd.read_csv('store_data.csv')
analyze_time_trends(sales_df, store_df)

Store-Level Monthly Sales Statistics:
Highest Store Monthly Sales: $1,975,276.00
Average Store Monthly Sales: $1,235,322.25
Lowest Store Monthly Sales: $404,617.00


## Advanced Analytics

In [38]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def perform_advanced_analysis(store_df, sales_df, customer_df):
    """
    Performs comprehensive statistical analysis on retail data
    """
    
    ###########################################
    # 1. STORE PERFORMANCE ANALYSIS
    ###########################################
    
    # Calculate store performance metrics
    store_metrics = store_df.copy()
    store_metrics['sales_per_employee'] = store_metrics['monthly_sales'] / store_metrics['employee_count']
    store_metrics['sales_efficiency'] = store_metrics['monthly_sales'] / store_metrics['size_sqft']
    
    # Analyze correlations between store metrics
    store_corr = store_metrics[[
        'monthly_sales', 'sales_per_employee', 'sales_efficiency',
        'customer_satisfaction', 'inventory_turnover'
    ]].corr()
    
    # Visualize store performance correlations
    fig_store = go.Figure(data=go.Heatmap(
        z=store_corr,
        x=store_corr.columns,
        y=store_corr.columns,
        colorscale='RdBu'
    ))
    fig_store.update_layout(title='Store Metrics Correlation Analysis')
    fig_store.show()
    
    ###########################################
    # 2. CUSTOMER SEGMENTATION
    ###########################################
    
    # Prepare customer features
    customer_features = customer_df.copy()
    customer_features['total_spend'] = customer_features['num_items'] * customer_features['avg_price']
    
    # Create customer segments based on spending
    customer_features['spending_percentile'] = pd.qcut(
        customer_features['total_spend'],
        q=3,
        labels=['Low Spenders', 'Medium Spenders', 'High Spenders']
    )
    
    # Analyze segments
    segment_analysis = customer_features.groupby('spending_percentile').agg({
        'total_spend': ['mean', 'count'],
        'num_items': 'mean',
        'avg_price': 'mean',
        'age': 'mean'
    }).round(2)
    
    # Visualize customer segments
    fig_customers = px.scatter(
        customer_features,
        x='total_spend',
        y='num_items',
        color='spending_percentile',
        size='avg_price',
        title='Customer Segmentation Analysis'
    )
    fig_customers.show()
    
    ###########################################
    # 3. SALES PATTERN ANALYSIS
    ###########################################
    
    # Analyze sales by weather
    weather_impact = sales_df.groupby('weather_condition').agg({
        'total_amount': ['mean', 'sum', 'count']
    }).round(2)
    weather_impact.columns = ['Avg Sale', 'Total Sales', 'Transaction Count']
    
    # Analyze sales by season
    sales_df['date'] = pd.to_datetime(sales_df['date'])
    sales_df['day_of_week'] = sales_df['date'].dt.day_name()
    
    daily_pattern = sales_df.groupby('day_of_week').agg({
        'total_amount': ['mean', 'sum', 'count']
    }).round(2)
    daily_pattern.columns = ['Avg Sale', 'Total Sales', 'Transaction Count']
    
    ###########################################
    # 4. PRINT INSIGHTS
    ###########################################
    
    print("\n" + "="*50)
    print("ADVANCED ANALYSIS INSIGHTS")
    print("="*50)
    
    #Store Performance Insights
    print("\n1. Store Performance Analysis:")
    print("\nKey Performance Indicators:")
    print(f"- Average Sales per Employee: ${store_metrics['sales_per_employee'].mean():,.2f}")
    print(f"- Average Sales per Sq.Ft: ${store_metrics['sales_efficiency'].mean():,.2f}")
    print(f"- Customer Satisfaction Correlation with Sales: {store_corr.loc['monthly_sales', 'customer_satisfaction']:.2f}")
    
    #Customer Segments Insights
    print("\n2. Customer Segmentation Analysis:")
    for segment in segment_analysis.index:
        stats = segment_analysis.loc[segment]
        print(f"\n{segment}:")
        print(f"- Count: {stats[('total_spend', 'count')]:,.0f}")
        print(f"- Average Spend: ${stats[('total_spend', 'mean')]:,.2f}")
        print(f"- Average Items: {stats[('num_items', 'mean')]:,.1f}")
        print(f"- Average Age: {stats[('age', 'mean')]:,.1f}")
    
    #Sales Pattern Insights
    print("\n3. Sales Patterns:")
    print("\nTop Weather Conditions by Sales:")
    print(weather_impact.sort_values('Total Sales', ascending=False).head().to_string())
    
    print("\nDaily Sales Patterns:")
    print(daily_pattern.sort_values('Total Sales', ascending=False).to_string())
    
    ###########################################
    # 5. RECOMMENDATIONS
    ###########################################
    
    print("\n" + "="*50)
    print("ACTIONABLE RECOMMENDATIONS")
    print("="*50)
    
    #Store Operations 
    print("\n1. Store Optimization:")
    if store_metrics['sales_per_employee'].std() > store_metrics['sales_per_employee'].mean() * 0.2:
        print("- Review staffing levels in low-performing stores")
    if store_corr.loc['monthly_sales', 'customer_satisfaction'] > 0.3:
        print("- Focus on customer satisfaction improvement initiatives")
    print("- Optimize inventory based on weather patterns")
    
    # Customer Strategy
    print("\n2. Customer Strategy:")
    for segment in segment_analysis.index:
        stats = segment_analysis.loc[segment]
        print(f"\n{segment} Strategy:")
        if segment == 'High Spenders':
            print("- Implement loyalty rewards program")
            print("- Provide exclusive early access to sales")
        elif segment == 'Medium Spenders':
            print("- Create upgrade paths to premium products")
            print("- Offer bundled discounts")
        else:
            print("- Introduce entry-level product lines")
            print("- Provide first-time buyer incentives")
    
    #Sales Optimization
    print("\n3. Sales Optimization:")
    top_weather = weather_impact.sort_values('Total Sales', ascending=False).index[0]
    print(f"- Prepare special promotions for {top_weather} weather")
    print("- Adjust staffing based on weather forecasts")
    print("- Create weather-specific marketing campaigns")
    
    return store_metrics, customer_features, weather_impact

#Running the analysis
store_metrics, customer_features, weather_impact = perform_advanced_analysis(store_df, sales_df, customer_df)


ADVANCED ANALYSIS INSIGHTS

1. Store Performance Analysis:

Key Performance Indicators:
- Average Sales per Employee: $37,316.62
- Average Sales per Sq.Ft: $11.40
- Customer Satisfaction Correlation with Sales: 0.05

2. Customer Segmentation Analysis:

Low Spenders:
- Count: 3,335
- Average Spend: $42.57
- Average Items: 9.1
- Average Age: 34.7

Medium Spenders:
- Count: 3,335
- Average Spend: $79.11
- Average Items: 11.1
- Average Age: 34.5

High Spenders:
- Count: 3,330
- Average Spend: $154.17
- Average Items: 12.8
- Average Age: 34.8

3. Sales Patterns:

Top Weather Conditions by Sales:
                   Avg Sale  Total Sales  Transaction Count
weather_condition                                          
Partly Cloudy         72.91    141665.89               1943
Clear                 74.24    126067.76               1698
Mild                  72.02     97660.59               1356
Rain                  73.20     77151.08               1054
Cool                  81.87     49287.35 