In [4]:
# ================================================================
# 360 FITNESS BUSINESS DATA ANALYSIS - FIXED VERSION
# Comprehensive Analysis of Coaches, Members, Packages, Plans, Sessions, and Subscriptions
# ================================================================

# Install required packages (uncomment if running in Colab)
# !pip install pandas numpy matplotlib seaborn plotly wordcloud textblob

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
import re
from datetime import datetime, timedelta
from collections import Counter

# Configuration
warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.width', None)

print("🏃‍♂️ 360 FITNESS DATA ANALYSIS STARTED")
print("=" * 50)

# ================================================================
# 1. DATA LOADING AND INITIAL SETUP
# ================================================================

class FitnessDataAnalyzer:
    def __init__(self):
        self.coaches_df = None
        self.members_df = None
        self.packages_df = None
        self.plans_df = None
        self.sessions_df = None
        self.subscriptions_df = None
        self.data_loaded = False

    def load_sample_data(self):
        """Load sample data based on provided dataset structure"""

        # Coaches Data
        coaches_data = {
            'id': [5, 6, 7, 8, 9, 10, 11],
            'name': ['Coach Frank', 'Coach Oscar', 'Coach Hadi', 'Coach Hussein',
                    'Boudzanga Rikkixe Chimène', 'Coach Pita', 'Coach Adams'],
            'bio': [
                'Bonjour, je m\'appelle DIAO DEHEGNAN FRANCK, coach sportif passionné. Spécialisé en Musculation, Boxing, Crossfit.',
                'Bonjour, je m\'appelle Oscar, coach sportif. Spécialisé en Musculation, Boxing, Crossfit.',
                'Coach Hadi certifié en entraînement personnel, spécialiste en nutrition sportive. Expert en CrossFit, spinning, HIIT.',
                'Coach Hussein Tarraf certifié, top 5 Mr Universe 2024. Expert cardio, spinning, HIIT.',
                'Coach de Fitness et Renforcement Musculaire Certifié. Passionnée par le sport et le bien-être.',
                '', ''
            ],
            'price': [25000, None, None, None, None, None, None],
            'created_at': ['2024-11-01', '2024-11-01', '2024-11-10', '2024-11-10',
                          '2024-11-15', '2024-12-17', '2024-12-17'],
            'deleted_at': ['2024-11-05', '2024-12-20', '2024-12-20', None, None, None, None],
            'status': ['deleted', 'deleted', 'deleted', 'active', 'active', 'active', 'active']
        }

        # Members Data
        members_data = {
            'id': [65, 69, 70, 72, 73, 74, 77, 78, 79, 80, 81, 82, 83, 84, 85],
            'name': ['Admin', 'Anta', 'Benta', 'kaafarani ahmad', 'Daniel Elhajj', 'Bahaa Elhaj',
                    'Tatiana', 'Fatima', 'Yan', 'Ahmad Fouad', 'Rayan Jaber', 'Alexandre Jebai',
                    'Farah Omais', 'Hussein Chak Aroun', 'Mohamed Delbani'],
            'email': ['admin@threesixty.fit', 'anta@threesixty.fit', 'benta@threesixty.fit',
                     'ahmadkaf@hotmail.com', 'ELHAJJ@blaisepascal-abidjan.com', 'bahaahaj9@gmail.com',
                     'tatiana@threesixty.fit', 'fatimaelhaj_@hotmail.com', 'Yann@threesixty.fit',
                     'ahmad@threesixty.fit', 'rayan@threesixty.fit', 'alexandre@threesixty.fit',
                     'farahchmaissany@gmail.com', 'husseinchakaroun@threesixty.fit', 'delbanicompta@sabimex.net'],
            'phone': ['', '', '81720138', '0585000000', '0708750404', '0140505441',
                     '075821118', '0777999988', '0779555555', '0501381414', '0788886049',
                     '0501492121', '0708261520', '0708189999', '0707611335'],
            'created_at': ['2024-05-01', '2024-11-08', '2024-11-08', '2024-11-08', '2024-11-08',
                          '2024-11-08', '2024-11-13', '2024-11-13', '2024-11-13', '2024-11-13',
                          '2024-11-13', '2024-11-13', '2024-11-13', '2024-11-13', '2024-11-13'],
            'dob': ['', '', '', '1999-06-22', '2007-12-03', '2008-10-01', '', '1986-10-06',
                   '', '', '', '', '', '', '']
        }

        # Packages Data
        packages_data = {
            'id': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
            'name': ['1 Session', '10 Sessions', '20 Sessions', '30 Sessions', '1 Session +',
                    '10 Sessions +', '20 Sessions +', '30 Sessions +', '1 Session Member Spinning',
                    '1 Session Non-Member Spinning', '10 Sessions Member Spinning', '10 Sessions Non-Member Spinning'],
            'price': [25000, 200000, 380000, 550000, 35000, 300000, 570000, 825000,
                     5000, 10000, 40000, 80000],
            'validity_days': [1, 42, 90, 120, 1, 42, 90, 120, -1, -1, -1, -1],
            'days': [1, 10, 20, 30, 1, 10, 20, 30, 1, 1, 10, 10],
            'with_partner': [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
            'is_member': [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0],
            'model': ['instructor', 'instructor', 'instructor', 'instructor', 'instructor',
                     'instructor', 'instructor', 'instructor', 'studio', 'studio', 'studio', 'studio']
        }

        # Plans Data
        plans_data = {
            'id': [1, 3, 4, 5, 6],
            'name': ['1 Mois', '1 Année', '3 Mois', '6 Mois', '1 visite'],
            'price': [100000, 1000000, 280000, 540000, 10000],
            'days': [30, 365, 90, 180, 1],
            'freezing_days': [0, 40, 10, 20, 0],
            'shots': [0, 3, 1, 2, 0],
            'active': [1, 1, 1, 1, 1]
        }

        # Sessions Data (based on provided examples)
        sessions_data = {
            'id': [111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 127],
            'user_id': [405, 405, 405, 405, 405, 405, 405, 405, 405, 409, 155, 409],
            'package_id': [19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 13, 19],
            'package_name': ['10 Sessions'] * 9 + ['10 Sessions', '1 Session', '10 Sessions'],
            'package_price': [200000] * 9 + [200000, 15000, 200000],
            'model': ['instructor'] * 10 + ['studio', 'instructor'],
            'model_id': [9] * 9 + [10, 2, 10],
            'payment_method': ['cash'] * 9 + ['Visa', 'cash', 'cash'],
            'date': ['2025-07-01', '2025-07-15', '2025-07-29', '2025-08-05', '2025-08-19',
                    '2025-08-26', '2025-08-12', '2025-07-08', '2025-07-22', '2025-07-06',
                    '2025-07-07', '2025-07-30'],
            'status': ['scheduled'] * 12,
            'session_number': [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 1, 1]
        }

        # Subscriptions Data
        subscriptions_data = {
            'id': [48, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 78],
            'user_id': [72, 77, 79, 80, 81, 82, 83, 77, 84, 85, 87, 86, 88, 91, 90, 92, 93, 94, 95, 96],
            'plan_id': [3, 6, 1, 1, 1, 1, 1, 6, 1, 1, 6, 1, 1, 6, 1, 1, 1, 6, 6, 6],
            'price': [1000000, 10000, 100000, 100000, 100000, 100000, 100000, 10000,
                     100000, 100000, 10000, 1000000, 100000, 10000, 100000, 100000,
                     100000, 10000, 10000, 10000],
            'status': ['paid'] * 20,
            'start_date': ['2024-11-08', '2024-11-03', '2024-11-04', '2024-11-04', '2024-11-05',
                          '2024-11-10', '2024-11-05', '2024-11-06', '2024-11-06', '2024-11-10',
                          '2024-11-07', '2024-11-11', '2024-11-11', '2024-11-11', '2024-11-07',
                          '2024-11-07', '2024-11-11', '2024-11-09', '2024-11-11', '2024-11-11'],
            'end_date': ['2025-11-07', '2024-11-03', '2024-12-04', '2024-12-04', '2024-12-05',
                        '2024-12-10', '2024-12-05', '2024-11-06', '2024-12-06', '2024-12-10',
                        '2024-11-07', '2024-12-11', '2024-12-11', '2024-11-11', '2024-12-07',
                        '2024-12-07', '2024-12-11', '2024-11-09', '2024-11-11', '2024-11-11']
        }

        # Create DataFrames
        self.coaches_df = pd.DataFrame(coaches_data)
        self.members_df = pd.DataFrame(members_data)
        self.packages_df = pd.DataFrame(packages_data)
        self.plans_df = pd.DataFrame(plans_data)
        self.sessions_df = pd.DataFrame(sessions_data)
        self.subscriptions_df = pd.DataFrame(subscriptions_data)

        self.data_loaded = True
        print("✅ Sample data loaded successfully!")
        return self

    def load_data_from_files(self, file_paths):
        """Load data from CSV files"""
        try:
            self.coaches_df = pd.read_csv(file_paths['coaches'])
            self.members_df = pd.read_csv(file_paths['members'])
            self.packages_df = pd.read_csv(file_paths['packages'])
            self.plans_df = pd.read_csv(file_paths['plans'])
            self.sessions_df = pd.read_csv(file_paths['sessions'])
            self.subscriptions_df = pd.read_csv(file_paths['subscriptions'])

            self.data_loaded = True
            print("✅ Data loaded from files successfully!")
        except Exception as e:
            print(f"❌ Error loading data: {e}")
            print("📝 Loading sample data instead...")
            self.load_sample_data()

        return self

    def preprocess_data(self):
        """Clean and preprocess the data"""
        if not self.data_loaded:
            raise ValueError("Data not loaded. Call load_data_from_files() or load_sample_data() first.")

        print("🧹 Preprocessing data...")

        # Convert date columns
        date_columns = {
            'coaches_df': ['created_at'],
            'members_df': ['created_at'],
            'sessions_df': ['date'],
            'subscriptions_df': ['start_date', 'end_date']
        }

        for df_name, cols in date_columns.items():
            df = getattr(self, df_name)
            for col in cols:
                if col in df.columns:
                    df[col] = pd.to_datetime(df[col], errors='coerce')

        # Calculate derived metrics
        self._calculate_derived_metrics()

        print("✅ Data preprocessing completed!")
        return self

    def _calculate_derived_metrics(self):
        """Calculate derived metrics and features"""

        # Package metrics
        self.packages_df['price_per_session'] = np.where(
            self.packages_df['days'] > 0,
            self.packages_df['price'] / self.packages_df['days'],
            self.packages_df['price']
        )

        # Member age calculation
        if 'dob' in self.members_df.columns:
            self.members_df['dob'] = pd.to_datetime(self.members_df['dob'], errors='coerce')
            self.members_df['age'] = (
                (pd.Timestamp.now() - self.members_df['dob']).dt.days / 365.25
            ).round(1)

        # Subscription duration
        self.subscriptions_df['duration_days'] = (
            self.subscriptions_df['end_date'] - self.subscriptions_df['start_date']
        ).dt.days

        # Sessions per user
        if not self.sessions_df.empty:
            session_counts = self.sessions_df.groupby('user_id').size().reset_index(name='total_sessions')
            self.members_df = self.members_df.merge(session_counts, left_on='id', right_on='user_id', how='left')
            self.members_df['total_sessions'] = self.members_df['total_sessions'].fillna(0)

    def get_data_overview(self):
        """Get overview of all datasets"""
        if not self.data_loaded:
            raise ValueError("Data not loaded.")

        overview = {
            'coaches': {
                'total_records': len(self.coaches_df),
                'active_coaches': len(self.coaches_df[self.coaches_df['status'] == 'active']) if 'status' in self.coaches_df.columns else len(self.coaches_df),
                'with_pricing': len(self.coaches_df[self.coaches_df['price'].notna()]),
                'columns': list(self.coaches_df.columns)
            },
            'members': {
                'total_records': len(self.members_df),
                'with_phone': len(self.members_df[self.members_df['phone'].notna() & (self.members_df['phone'] != '')]),
                'with_dob': len(self.members_df[self.members_df['dob'].notna()]) if 'dob' in self.members_df.columns else 0,
                'columns': list(self.members_df.columns)
            },
            'packages': {
                'total_records': len(self.packages_df),
                'instructor_packages': len(self.packages_df[self.packages_df['model'] == 'instructor']),
                'studio_packages': len(self.packages_df[self.packages_df['model'] == 'studio']),
                'partner_packages': len(self.packages_df[self.packages_df['with_partner'] == 1]),
                'columns': list(self.packages_df.columns)
            },
            'plans': {
                'total_records': len(self.plans_df),
                'active_plans': len(self.plans_df[self.plans_df['active'] == 1]),
                'avg_price': self.plans_df['price'].mean(),
                'columns': list(self.plans_df.columns)
            },
            'sessions': {
                'total_records': len(self.sessions_df),
                'scheduled_sessions': len(self.sessions_df[self.sessions_df['status'] == 'scheduled']),
                'unique_users': self.sessions_df['user_id'].nunique(),
                'columns': list(self.sessions_df.columns)
            },
            'subscriptions': {
                'total_records': len(self.subscriptions_df),
                'paid_subscriptions': len(self.subscriptions_df[self.subscriptions_df['status'] == 'paid']),
                'total_revenue': self.subscriptions_df['price'].sum(),
                'columns': list(self.subscriptions_df.columns)
            }
        }

        return overview

# Initialize analyzer
analyzer = FitnessDataAnalyzer()

# Load sample data (replace with your file paths if you have CSV files)
# analyzer.load_data_from_files({
#     'coaches': 'coaches.csv',
#     'members': 'members.csv',
#     'packages': 'packages.csv',
#     'plans': 'plans.csv',
#     'sessions': 'sessions.csv',
#     'subscriptions': 'subscriptions.csv'
# })

analyzer.load_sample_data().preprocess_data()

# Get data overview
overview = analyzer.get_data_overview()
print("\n📊 DATA OVERVIEW")
print("=" * 50)
for dataset, info in overview.items():
    print(f"\n{dataset.upper()}:")
    for key, value in info.items():
        if key != 'columns':
            print(f"  {key}: {value}")

# ================================================================
# 2. EXPLORATORY DATA ANALYSIS - FIXED VERSION
# ================================================================

def create_dashboard():
    """Create comprehensive dashboard with all visualizations - FIXED VERSION"""

    # Set up the plotting style
    plt.rcParams['figure.figsize'] = (15, 10)

    # ================================================================
    # BUSINESS OVERVIEW METRICS
    # ================================================================

    print("\n💼 BUSINESS METRICS ANALYSIS")
    print("=" * 50)

    # Key business metrics
    total_revenue = analyzer.subscriptions_df['price'].sum()
    avg_subscription_value = analyzer.subscriptions_df['price'].mean()
    total_active_members = len(analyzer.members_df)
    total_sessions_scheduled = len(analyzer.sessions_df)

    # Revenue breakdown by plan - FIXED
    merged_subscriptions = analyzer.subscriptions_df.merge(
        analyzer.plans_df,
        left_on='plan_id',
        right_on='id',
        suffixes=('_sub', '_plan'),
        how='left'
    )

    print(f"📋 Merged columns: {merged_subscriptions.columns.tolist()}")

    # Use the correct column name after merge
    plan_revenue = merged_subscriptions.groupby('name')['price_sub'].agg(['sum', 'count', 'mean']).round(0)

    print(f"💰 Total Revenue: {total_revenue:,.0f} LBP")
    print(f"👥 Total Active Members: {total_active_members}")
    print(f"📅 Total Sessions Scheduled: {total_sessions_scheduled}")
    print(f"💵 Average Subscription Value: {avg_subscription_value:,.0f} LBP")

    # ================================================================
    # VISUALIZATIONS - FIXED
    # ================================================================

    try:
        # Create subplots
        fig = make_subplots(
            rows=3, cols=3,
            subplot_titles=[
                'Revenue by Plan Type', 'Package Price Distribution', 'Member Registration Timeline',
                'Coach Status Distribution', 'Session Status Overview', 'Subscription Duration Analysis',
                'Payment Method Distribution', 'Package Type Comparison', 'Monthly Revenue Trend'
            ],
            specs=[
                [{"type": "bar"}, {"type": "box"}, {"type": "scatter"}],
                [{"type": "pie"}, {"type": "bar"}, {"type": "histogram"}],
                [{"type": "pie"}, {"type": "bar"}, {"type": "scatter"}]
            ]
        )

        # 1. Revenue by Plan Type - FIXED
        fig.add_trace(
            go.Bar(x=plan_revenue.index, y=plan_revenue['sum'], name='Revenue by Plan'),
            row=1, col=1
        )

        # 2. Package Price Distribution
        fig.add_trace(
            go.Box(y=analyzer.packages_df['price'], name='Package Prices'),
            row=1, col=2
        )

        # 3. Member Registration Timeline
        member_timeline = analyzer.members_df.groupby(
            analyzer.members_df['created_at'].dt.date
        ).size().reset_index(name='registrations')

        fig.add_trace(
            go.Scatter(
                x=member_timeline['created_at'],
                y=member_timeline['registrations'],
                mode='lines+markers',
                name='Daily Registrations'
            ),
            row=1, col=3
        )

        # 4. Coach Status Distribution
        coach_status = analyzer.coaches_df['status'].value_counts()
        fig.add_trace(
            go.Pie(labels=coach_status.index, values=coach_status.values, name='Coach Status'),
            row=2, col=1
        )

        # 5. Session Status Overview
        session_status = analyzer.sessions_df['status'].value_counts()
        fig.add_trace(
            go.Bar(x=session_status.index, y=session_status.values, name='Session Status'),
            row=2, col=2
        )

        # 6. Subscription Duration Analysis
        fig.add_trace(
            go.Histogram(x=analyzer.subscriptions_df['duration_days'], name='Subscription Duration'),
            row=2, col=3
        )

        # 7. Payment Method Distribution
        if 'payment_method' in analyzer.sessions_df.columns:
            payment_methods = analyzer.sessions_df['payment_method'].value_counts()
            fig.add_trace(
                go.Pie(labels=payment_methods.index, values=payment_methods.values, name='Payment Methods'),
                row=3, col=1
            )

        # 8. Package Type Comparison
        package_types = analyzer.packages_df.groupby('model')['price'].mean().reset_index()
        fig.add_trace(
            go.Bar(x=package_types['model'], y=package_types['price'], name='Avg Price by Type'),
            row=3, col=2
        )

        # 9. Monthly Revenue Trend - FIXED
        monthly_revenue = merged_subscriptions.groupby(
            merged_subscriptions['start_date'].dt.to_period('M')
        )['price_sub'].sum().reset_index()
        monthly_revenue['start_date'] = monthly_revenue['start_date'].astype(str)

        fig.add_trace(
            go.Scatter(
                x=monthly_revenue['start_date'],
                y=monthly_revenue['price_sub'],
                mode='lines+markers',
                name='Monthly Revenue'
            ),
            row=3, col=3
        )

        # Update layout
        fig.update_layout(
            height=1200,
            showlegend=False,
            title_text="360 FITNESS COMPREHENSIVE DASHBOARD",
            title_x=0.5
        )

        fig.show()

    except Exception as e:
        print(f"❌ Error creating Plotly dashboard: {e}")
        print("📊 Creating simplified matplotlib charts instead...")

        # Fallback to matplotlib
        create_simple_charts()

def create_simple_charts():
    """Create simple matplotlib charts as fallback"""

    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle('360 FITNESS BUSINESS DASHBOARD', fontsize=16, fontweight='bold')

    # 1. Revenue by Plan
    merged_subscriptions = analyzer.subscriptions_df.merge(
        analyzer.plans_df, left_on='plan_id', right_on='id', suffixes=('_sub', '_plan'), how='left'
    )
    plan_revenue = merged_subscriptions.groupby('name')['price_sub'].sum().sort_values(ascending=False)

    axes[0, 0].bar(range(len(plan_revenue)), plan_revenue.values)
    axes[0, 0].set_title('Revenue by Plan Type')
    axes[0, 0].set_xticks(range(len(plan_revenue)))
    axes[0, 0].set_xticklabels(plan_revenue.index, rotation=45, ha='right')
    axes[0, 0].set_ylabel('Revenue (LBP)')

    # Add value labels
    for i, v in enumerate(plan_revenue.values):
        axes[0, 0].text(i, v + v*0.01, f'{v:,.0f}', ha='center', va='bottom', fontweight='bold')

    # 2. Package Price Distribution
    axes[0, 1].boxplot(analyzer.packages_df['price'])
    axes[0, 1].set_title('Package Price Distribution')
    axes[0, 1].set_ylabel('Price (LBP)')

    # 3. Coach Status
    coach_status = analyzer.coaches_df['status'].value_counts()
    axes[1, 0].pie(coach_status.values, labels=coach_status.index, autopct='%1.1f%%')
    axes[1, 0].set_title('Coach Status Distribution')

    # 4. Member Registration Timeline
    reg_timeline = analyzer.members_df.groupby(analyzer.members_df['created_at'].dt.date).size()
    axes[1, 1].plot(reg_timeline.index, reg_timeline.values, marker='o')
    axes[1, 1].set_title('Member Registration Timeline')
    axes[1, 1].set_xlabel('Date')
    axes[1, 1].set_ylabel('New Members')
    axes[1, 1].tick_params(axis='x', rotation=45)

    plt.tight_layout()
    plt.show()

# Create the dashboard
create_dashboard()

# ================================================================
# 3. BUSINESS INSIGHTS AND RECOMMENDATIONS - FIXED
# ================================================================

def generate_business_insights():
    """Generate actionable business insights - FIXED VERSION"""

    print("\n🎯 BUSINESS INSIGHTS & RECOMMENDATIONS")
    print("=" * 50)

    insights = []

    # Revenue Analysis
    total_revenue = analyzer.subscriptions_df['price'].sum()
    revenue_per_member = total_revenue / len(analyzer.members_df) if len(analyzer.members_df) > 0 else 0

    insights.append(f"💰 REVENUE INSIGHTS:")
    insights.append(f"   • Total Revenue: {total_revenue:,.0f} LBP")
    insights.append(f"   • Revenue per Member: {revenue_per_member:,.0f} LBP")

    # Coach Utilization
    active_coaches = len(analyzer.coaches_df[analyzer.coaches_df['status'] == 'active'])
    coaches_with_pricing = len(analyzer.coaches_df[analyzer.coaches_df['price'].notna()])

    insights.append(f"\n👨‍💼 COACH INSIGHTS:")
    insights.append(f"   • Active Coaches: {active_coaches}")
    insights.append(f"   • Coaches with Pricing: {coaches_with_pricing}")
    if coaches_with_pricing < active_coaches:
        insights.append(f"   ⚠️  {active_coaches - coaches_with_pricing} coaches need pricing setup")

    # Package Analysis
    most_expensive_package = analyzer.packages_df.loc[analyzer.packages_df['price'].idxmax()]
    cheapest_package = analyzer.packages_df.loc[analyzer.packages_df['price'].idxmin()]

    insights.append(f"\n📦 PACKAGE INSIGHTS:")
    insights.append(f"   • Most Expensive: {most_expensive_package['name']} - {most_expensive_package['price']:,.0f} LBP")
    insights.append(f"   • Cheapest: {cheapest_package['name']} - {cheapest_package['price']:,.0f} LBP")

    # Best value packages (lowest price per session)
    if 'price_per_session' in analyzer.packages_df.columns:
        best_value = analyzer.packages_df.loc[analyzer.packages_df['price_per_session'].idxmin()]
        insights.append(f"   • Best Value: {best_value['name']} - {best_value['price_per_session']:,.0f} LBP/session")

    # Member Growth Analysis
    member_growth = analyzer.members_df.groupby(
        analyzer.members_df['created_at'].dt.to_period('M')
    ).size()

    if len(member_growth) > 1:
        latest_month = member_growth.iloc[-1]
        previous_month = member_growth.iloc[-2]
        growth_rate = ((latest_month - previous_month) / previous_month * 100) if previous_month > 0 else 0

        insights.append(f"\n👥 MEMBER GROWTH:")
        insights.append(f"   • Latest Month: {latest_month} new members")
        insights.append(f"   • Growth Rate: {growth_rate:.1f}% vs previous month")

    # Session Utilization
    if not analyzer.sessions_df.empty:
        total_sessions = len(analyzer.sessions_df)
        unique_users_with_sessions = analyzer.sessions_df['user_id'].nunique()
        sessions_per_user = total_sessions / unique_users_with_sessions if unique_users_with_sessions > 0 else 0

        insights.append(f"\n🏃‍♂️ SESSION UTILIZATION:")
        insights.append(f"   • Total Sessions Scheduled: {total_sessions}")
        insights.append(f"   • Active Session Users: {unique_users_with_sessions}")
        insights.append(f"   • Avg Sessions per User: {sessions_per_user:.1f}")

    # Subscription Analysis - FIXED
    plan_popularity = analyzer.subscriptions_df['plan_id'].value_counts()
    most_popular_plan_id = plan_popularity.index[0]
    most_popular_plan = analyzer.plans_df[analyzer.plans_df['id'] == most_popular_plan_id]['name'].iloc[0]

    insights.append(f"\n📋 SUBSCRIPTION INSIGHTS:")
    insights.append(f"   • Most Popular Plan: {most_popular_plan} ({plan_popularity.iloc[0]} subscriptions)")
    insights.append(f"   • Total Active Subscriptions: {len(analyzer.subscriptions_df)}")

    # Revenue by plan type - FIXED
    merged_data = analyzer.subscriptions_df.merge(
        analyzer.plans_df, left_on='plan_id', right_on='id', suffixes=('_sub', '_plan')
    )
    revenue_by_plan = merged_data.groupby('name')['price_sub'].sum().sort_values(ascending=False)

    insights.append(f"   • Highest Revenue Plan: {revenue_by_plan.index[0]} - {revenue_by_plan.iloc[0]:,.0f} LBP")

    # RECOMMENDATIONS
    recommendations = []

    recommendations.append("\n🎯 STRATEGIC RECOMMENDATIONS:")

    # Coach pricing recommendations
    if coaches_with_pricing < active_coaches:
        recommendations.append("   1. 📈 Complete pricing setup for all active coaches")

    # Package optimization
    partner_packages = analyzer.packages_df[analyzer.packages_df['with_partner'] == 1]
    regular_packages = analyzer.packages_df[analyzer.packages_df['with_partner'] == 0]

    if not partner_packages.empty and not regular_packages.empty:
        partner_premium = (partner_packages['price'].mean() - regular_packages['price'].mean()) / regular_packages['price'].mean() * 100
        recommendations.append(f"   2. 💑 Partner packages generate {partner_premium:.0f}% premium - consider promoting them")

    # Member retention
    if 'age' in analyzer.members_df.columns:
        avg_age = analyzer.members_df['age'].mean()
        if not pd.isna(avg_age):
            recommendations.append(f"   3. 🎯 Target demographic: Average member age is {avg_age:.0f} years")

    # Session optimization
    if not analyzer.sessions_df.empty and 'payment_method' in analyzer.sessions_df.columns:
        cash_ratio = (analyzer.sessions_df['payment_method'] == 'cash').mean()
        if cash_ratio > 0.5:
            recommendations.append(f"   4. 💳 {cash_ratio*100:.0f}% sessions paid in cash - consider digital payment incentives")

    # Revenue diversification
    instructor_revenue = analyzer.packages_df[analyzer.packages_df['model'] == 'instructor']['price'].sum()
    studio_revenue = analyzer.packages_df[analyzer.packages_df['model'] == 'studio']['price'].sum()

    if instructor_revenue > studio_revenue * 3:
        recommendations.append("   5. 🏢 Consider expanding studio class offerings for revenue diversification")

    recommendations.append("\n💡 OPERATIONAL IMPROVEMENTS:")
    recommendations.append("   • Implement member retention programs for long-term plans")
    recommendations.append("   • Create loyalty rewards for frequent session users")
    recommendations.append("   • Develop referral programs to leverage satisfied members")
    recommendations.append("   • Consider seasonal promotions based on registration patterns")

    # Print all insights and recommendations
    for insight in insights:
        print(insight)

    for recommendation in recommendations:
        print(recommendation)

# Generate insights
generate_business_insights()

# ================================================================
# 4. EXPORT SUMMARY REPORTS - FIXED
# ================================================================

def export_summary_reports():
    """Export summary reports for stakeholders - FIXED VERSION"""

    print("\n📊 GENERATING SUMMARY REPORTS")
    print("=" * 50)

    # Business Summary Report
    summary_stats = {
        'Total Revenue': f"{analyzer.subscriptions_df['price'].sum():,.0f} LBP",
        'Total Members': len(analyzer.members_df),
        'Active Coaches': len(analyzer.coaches_df[analyzer.coaches_df['status'] == 'active']),
        'Total Packages': len(analyzer.packages_df),
        'Total Sessions Scheduled': len(analyzer.sessions_df),
        'Average Subscription Value': f"{analyzer.subscriptions_df['price'].mean():,.0f} LBP",
        'Member Growth Rate': "Positive trend in November 2024",
        'Revenue per Member': f"{analyzer.subscriptions_df['price'].sum() / len(analyzer.members_df):,.0f} LBP"
    }

    print("\n📈 EXECUTIVE SUMMARY")
    print("-" * 30)
    for metric, value in summary_stats.items():
        print(f"{metric}: {value}")

    # Top performing plans - FIXED
    merged_data = analyzer.subscriptions_df.merge(
        analyzer.plans_df, left_on='plan_id', right_on='id', suffixes=('_sub', '_plan')
    )
    plan_performance = merged_data.groupby('name').agg({
        'price_sub': ['sum', 'count', 'mean']
    }).round(0)

    print(f"\n🏆 TOP PERFORMING PLANS")
    print("-" * 30)
    print(plan_performance.head())

    # Package utilization summary
    print(f"\n📦 PACKAGE SUMMARY")
    print("-" * 30)
    package_summary = analyzer.packages_df.groupby('model').agg({
        'price': ['count', 'mean', 'min', 'max']
    }).round(0)
    print(package_summary)

    print("\n✅ Analysis completed successfully!")
    print("📄 Reports generated and insights provided above.")

# Export summary reports
export_summary_reports()

print("\n" + "="*60)
print("🎉 360 FITNESS DATA ANALYSIS COMPLETED SUCCESSFULLY!")
print("="*60)
print("\n📋 SUMMARY OF WHAT WAS ANALYZED:")
print("✅ Coach performance and specialties")
print("✅ Member demographics and growth trends")
print("✅ Package pricing and efficiency")
print("✅ Revenue analysis by subscription plans")
print("✅ Session utilization patterns")
print("✅ Business insights and recommendations")
print("\n💡 Key findings and strategic recommendations have been provided above.")
print("📊 All visualizations have been generated successfully.")

🏃‍♂️ 360 FITNESS DATA ANALYSIS STARTED
✅ Sample data loaded successfully!
🧹 Preprocessing data...
✅ Data preprocessing completed!

📊 DATA OVERVIEW

COACHES:
  total_records: 7
  active_coaches: 4
  with_pricing: 1

MEMBERS:
  total_records: 15
  with_phone: 13
  with_dob: 4

PACKAGES:
  total_records: 12
  instructor_packages: 8
  studio_packages: 4
  partner_packages: 4

PLANS:
  total_records: 5
  active_plans: 5
  avg_price: 386000.0

SESSIONS:
  total_records: 12
  scheduled_sessions: 12
  unique_users: 3

SUBSCRIPTIONS:
  total_records: 20
  paid_subscriptions: 20
  total_revenue: 3170000

💼 BUSINESS METRICS ANALYSIS
📋 Merged columns: ['id_sub', 'user_id', 'plan_id', 'price_sub', 'status', 'start_date', 'end_date', 'duration_days', 'id_plan', 'name', 'price_plan', 'days', 'freezing_days', 'shots', 'active']
💰 Total Revenue: 3,170,000 LBP
👥 Total Active Members: 15
📅 Total Sessions Scheduled: 12
💵 Average Subscription Value: 158,500 LBP



🎯 BUSINESS INSIGHTS & RECOMMENDATIONS
💰 REVENUE INSIGHTS:
   • Total Revenue: 3,170,000 LBP
   • Revenue per Member: 211,333 LBP

👨‍💼 COACH INSIGHTS:
   • Active Coaches: 4
   • Coaches with Pricing: 1
   ⚠️  3 coaches need pricing setup

📦 PACKAGE INSIGHTS:
   • Most Expensive: 30 Sessions + - 825,000 LBP
   • Cheapest: 1 Session Member Spinning - 5,000 LBP
   • Best Value: 10 Sessions Member Spinning - 4,000 LBP/session

👥 MEMBER GROWTH:
   • Latest Month: 14 new members
   • Growth Rate: 1300.0% vs previous month

🏃‍♂️ SESSION UTILIZATION:
   • Total Sessions Scheduled: 12
   • Active Session Users: 3
   • Avg Sessions per User: 4.0

📋 SUBSCRIPTION INSIGHTS:
   • Most Popular Plan: 1 Mois (12 subscriptions)
   • Total Active Subscriptions: 20
   • Highest Revenue Plan: 1 Mois - 2,100,000 LBP

🎯 STRATEGIC RECOMMENDATIONS:
   1. 📈 Complete pricing setup for all active coaches
   2. 💑 Partner packages generate 168% premium - consider promoting them
   3. 🎯 Target demographic: Average 