In [13]:
# Enhanced Library Management System - Data Analysis Notebook
"""
Comprehensive library data analysis with corrected database connections,
improved visualizations, and actionable insights for library management.
"""

import sys
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime, timedelta, date
import sqlite3
from sqlalchemy import create_engine, func, and_, text
from sqlalchemy.orm import sessionmaker
import json
import warnings
warnings.filterwarnings('ignore')

project_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))
if project_root not in sys.path:
    sys.path.append(project_root)

# Import your models and services
from utils.database_manager import MyDatabaseManager as DatabaseManager
from services.analytics_service import AnalyticsService
from db.models import Book, BorrowedBook, Patron, Payment, PaymentItem, Category, MembershipStatus, PaymentStatus


In [14]:

# =============================================================================
# SETUP AND CONFIGURATION
# =============================================================================

class LibraryAnalytics:
    """Comprehensive library analytics class with corrected database interactions."""
    
    def __init__(self, db_path):
        self.db_manager = DatabaseManager(db_path)
        self.analytics_service = AnalyticsService(self.db_manager)
        
        # Create direct SQLAlchemy connection for pandas operations
        self.engine = create_engine(f'sqlite:///{db_path}')
        
        print("Database connection established successfully!")
        self._validate_database()
    
    def _validate_database(self):
        """Validate database structure and show basic stats."""
        with self.db_manager.get_session() as session:
            tables_info = {
                'books': session.query(Book).count(),
                'patrons': session.query(Patron).count(),
                'borrowed_books': session.query(BorrowedBook).count(),
                'payments': session.query(Payment).count()
            }
            
        print("\nDatabase Overview:")
        for table, count in tables_info.items():
            print(f"  {table.replace('_', ' ').title()}: {count}")
        
        return tables_info
    
    # =============================================================================
    # DATA LOADING AND PREPARATION
    # =============================================================================
    
    def load_analysis_data(self):
        """Load all relevant data for analysis with proper relationships."""
        
        # Books with category information
        books_query = """
        SELECT 
            b.book_id, b.title, b.author, b.isbn, b.class_name, 
            b.accession_no, b.is_available, b.created_at
        FROM books b
        """
        self.books_df = pd.read_sql_query(books_query, self.engine)
        
        # Patrons with membership details
        patrons_query = """
        SELECT 
            p.user_id, p.patron_id, p.first_name, p.last_name,
            (p.first_name || ' ' || p.last_name) as full_name,
            p.institution, p.grade_level, p.category, p.age, p.gender,
            p.date_of_birth, p.residence, p.phone_number,
            p.membership_status, p.membership_start_date, p.membership_expiry_date,
            p.membership_type
        FROM patrons p
        """
        self.patrons_df = pd.read_sql_query(patrons_query, self.engine)
        
        # Borrowed books with complete relationship data
        borrowings_query = """
        SELECT 
            bb.borrow_id, bb.user_id, bb.book_id, bb.borrow_date, 
            bb.due_date, bb.return_date, bb.returned, bb.fine_amount,
            b.title as book_title, b.author as book_author, b.class_name as book_genre,
            (p.first_name || ' ' || p.last_name) as patron_name,
            p.category as patron_category, p.membership_status,
            CASE 
                WHEN bb.return_date IS NULL AND bb.due_date < date('now') 
                THEN (julianday('now') - julianday(bb.due_date))
                ELSE 0 
            END as days_overdue
        FROM borrowed_books bb
        JOIN books b ON bb.book_id = b.book_id
        JOIN patrons p ON bb.user_id = p.user_id
        """
        self.borrowings_df = pd.read_sql_query(borrowings_query, self.engine)
        
        # Payments with item details
        payments_query = """
        SELECT 
            py.payment_id, py.user_id, py.payment_item_id,
            py.amount_paid, py.total_amount_due, py.payment_date, py.status,
            py.membership_start_date, py.membership_expiry_date, 
            py.is_membership_active, py.notes,
            (p.first_name || ' ' || p.last_name) as patron_name,
            p.category as patron_category,
            pi.name as payment_type, pi.display_name as payment_description,
            pi.is_membership,
            (py.total_amount_due - py.amount_paid) as remaining_amount
        FROM payments py
        JOIN patrons p ON py.user_id = p.user_id
        JOIN payment_items pi ON py.payment_item_id = pi.id
        """
        self.payments_df = pd.read_sql_query(payments_query, self.engine)
        
        # Convert date columns
        date_columns = {
            'borrowings_df': ['borrow_date', 'due_date', 'return_date'],
            'payments_df': ['payment_date', 'membership_start_date', 'membership_expiry_date'],
            'patrons_df': ['membership_start_date', 'membership_expiry_date', 'date_of_birth']
        }
        
        for df_name, columns in date_columns.items():
            df = getattr(self, df_name)
            for col in columns:
                if col in df.columns:
                    df[col] = pd.to_datetime(df[col], errors='coerce')
        
        print(f"\nData loaded successfully:")
        print(f"  Books: {len(self.books_df)}")
        print(f"  Patrons: {len(self.patrons_df)}")
        print(f"  Borrowings: {len(self.borrowings_df)}")
        print(f"  Payments: {len(self.payments_df)}")
        
        return True
    
    # =============================================================================
    # CORE ANALYTICS FUNCTIONS
    # =============================================================================
    
    def analyze_collection_health(self):
        """Analyze the health and composition of the book collection."""
        if self.books_df.empty:
            return None
            
        # Genre distribution
        genre_dist = self.books_df['class_name'].value_counts()
        
        # Availability analysis
        availability_stats = {
            'total_books': len(self.books_df),
            'available_books': len(self.books_df[self.books_df['is_available'] == True]),
            'unavailable_books': len(self.books_df[self.books_df['is_available'] == False])
        }
        availability_stats['availability_rate'] = (availability_stats['available_books'] / availability_stats['total_books']) * 100
        
        # Create visualization
        fig = make_subplots(
            rows=1, cols=2,
            subplot_titles=('Collection by Genre', 'Book Availability'),
            specs=[[{"type": "pie"}, {"type": "bar"}]]
        )
        
        # Genre pie chart
        fig.add_trace(
            go.Pie(
                labels=genre_dist.index,
                values=genre_dist.values,
                name="Genre Distribution",
                hovertemplate="<b>%{label}</b><br>Books: %{value}<br>Percentage: %{percent}<extra></extra>"
            ), row=1, col=1
        )
        
        # Availability bar chart
        fig.add_trace(
            go.Bar(
                x=['Available', 'Checked Out'],
                y=[availability_stats['available_books'], availability_stats['unavailable_books']],
                marker_color=['#2E8B57', '#DC143C'],
                name="Availability"
            ), row=1, col=2
        )
        
        fig.update_layout(
            title="Library Collection Health Analysis",
            template="plotly_white",
            height=500
        )
        
        return fig, availability_stats, genre_dist
    
    def analyze_borrowing_patterns(self):
        """Comprehensive borrowing pattern analysis."""
        if self.borrowings_df.empty:
            return None
            
        # Monthly trends
        self.borrowings_df['borrow_month'] = self.borrowings_df['borrow_date'].dt.to_period('M')
        monthly_trends = self.borrowings_df.groupby('borrow_month').size()
        
        # Day of week patterns
        self.borrowings_df['day_of_week'] = self.borrowings_df['borrow_date'].dt.day_name()
        dow_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
        dow_patterns = self.borrowings_df['day_of_week'].value_counts().reindex(dow_order, fill_value=0)
        
        # Hourly patterns (if time data available)
        self.borrowings_df['borrow_hour'] = self.borrowings_df['borrow_date'].dt.hour
        hourly_patterns = self.borrowings_df['borrow_hour'].value_counts().sort_index()
        
        # Create comprehensive visualization
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=('Monthly Borrowing Trends', 'Borrowings by Day of Week', 
                          'Hourly Distribution', 'Genre Popularity'),
            specs=[[{"colspan": 2}, None], 
                   [{"type": "bar"}, {"type": "bar"}]]
        )
        
        # Monthly trend line
        fig.add_trace(
            go.Scatter(
                x=[str(m) for m in monthly_trends.index],
                y=monthly_trends.values,
                mode='lines+markers',
                name='Monthly Borrowings',
                line=dict(color='#2E86AB', width=3),
                marker=dict(size=8)
            ), row=1, col=1
        )
        
        # Day of week bar chart
        fig.add_trace(
            go.Bar(
                x=dow_patterns.index,
                y=dow_patterns.values,
                marker_color='#A23B72',
                name='Day of Week'
            ), row=2, col=1
        )
        
        # Hourly distribution
        fig.add_trace(
            go.Bar(
                x=hourly_patterns.index,
                y=hourly_patterns.values,
                marker_color='#F18F01',
                name='Hourly Pattern'
            ), row=2, col=2
        )
        
        fig.update_layout(
            title="Borrowing Pattern Analysis",
            template="plotly_white",
            height=700,
            showlegend=False
        )
        
        return fig, {
            'monthly_trends': monthly_trends,
            'peak_day': dow_patterns.idxmax(),
            'peak_hour': hourly_patterns.idxmax() if not hourly_patterns.empty else None,
            'total_borrowings': len(self.borrowings_df)
        }
    
    def analyze_patron_behavior(self):
        """Deep dive into patron behavior and engagement."""
        if self.borrowings_df.empty or self.patrons_df.empty:
            return None
            
        # Patron activity metrics
        patron_activity = self.borrowings_df.groupby('patron_name').agg({
            'borrow_id': 'count',
            'book_title': lambda x: x.nunique(),
            'days_overdue': 'sum',
            'fine_amount': 'sum'
        }).rename(columns={
            'borrow_id': 'total_borrowings',
            'book_title': 'unique_books',
            'days_overdue': 'total_overdue_days',
            'fine_amount': 'total_fines'
        })
        
        # Add payment data
        if not self.payments_df.empty:
            patron_payments = self.payments_df.groupby('patron_name')['amount_paid'].sum()
            patron_activity = patron_activity.join(patron_payments, how='left')
            patron_activity['amount_paid'] = patron_activity['amount_paid'].fillna(0)
        
        # Category-wise analysis
        category_stats = self.borrowings_df.groupby('patron_category').agg({
            'borrow_id': 'count',
            'patron_name': lambda x: x.nunique()
        }).rename(columns={'borrow_id': 'total_borrowings', 'patron_name': 'active_patrons'})
        
        # Create visualization
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=('Patron Activity Scatter', 'Borrowings by Category', 
                          'Top 10 Most Active Patrons', 'Membership Distribution'),
            specs=[[{"type": "scatter"}, {"type": "pie"}],
                   [{"type": "bar"}, {"type": "bar"}]]
        )
        
        # Activity scatter plot
        fig.add_trace(
            go.Scatter(
                x=patron_activity['total_borrowings'],
                y=patron_activity.get('amount_paid', [0] * len(patron_activity)),
                mode='markers',
                text=patron_activity.index,
                marker=dict(
                    size=patron_activity['unique_books'] * 2,
                    color=patron_activity['total_borrowings'],
                    colorscale='Viridis',
                    showscale=True,
                    sizemode='diameter',
                    sizemin=4
                ),
                hovertemplate='<b>%{text}</b><br>Borrowings: %{x}<br>Payments: $%{y}<extra></extra>'
            ), row=1, col=1
        )
        
        # Category pie chart
        fig.add_trace(
            go.Pie(
                labels=category_stats.index,
                values=category_stats['total_borrowings'],
                name="Category Distribution"
            ), row=1, col=2
        )
        
        # Top patrons bar chart
        top_patrons = patron_activity.nlargest(10, 'total_borrowings')
        fig.add_trace(
            go.Bar(
                x=top_patrons['total_borrowings'],
                y=[name[:20] + '...' if len(name) > 20 else name for name in top_patrons.index],
                orientation='h',
                marker_color='#C73E1D'
            ), row=2, col=1
        )
        
        # Membership status distribution
        if not self.patrons_df.empty:
            membership_dist = self.patrons_df['membership_status'].value_counts()
            fig.add_trace(
                go.Bar(
                    x=membership_dist.index,
                    y=membership_dist.values,
                    marker_color='#2E8B57'
                ), row=2, col=2
            )
        
        fig.update_layout(
            title="Patron Behavior Analysis",
            template="plotly_white",
            height=800
        )
        
        return fig, patron_activity
    
    def analyze_overdue_crisis(self):
        """Critical analysis of overdue books and patterns."""
        if self.borrowings_df.empty:
            return None
            
        # Current overdue analysis
        current_overdue = self.borrowings_df[
            (self.borrowings_df['return_date'].isna()) & 
            (self.borrowings_df['days_overdue'] > 0)
        ].copy()
        
        if current_overdue.empty:
            print("No overdue books found!")
            return None
            
        # Overdue severity categorization
        current_overdue['severity'] = pd.cut(
            current_overdue['days_overdue'],
            bins=[0, 7, 30, 90, float('inf')],
            labels=['1-7 days', '8-30 days', '31-90 days', '90+ days'],
            include_lowest=True
        )
        
        severity_dist = current_overdue['severity'].value_counts()
        
        # Top overdue patrons
        overdue_by_patron = current_overdue.groupby('patron_name').agg({
            'days_overdue': ['count', 'sum', 'mean'],
            'fine_amount': 'sum'
        }).round(2)
        overdue_by_patron.columns = ['overdue_books', 'total_days_overdue', 'avg_days_overdue', 'total_fines']
        
        # Create visualization
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=('Overdue Severity Distribution', 'Overdue Days Histogram', 
                          'Top 10 Overdue Patrons', 'Overdue by Genre'),
            specs=[[{"type": "pie"}, {"type": "histogram"}],
                   [{"type": "bar"}, {"type": "bar"}]]
        )
        
        # Severity pie chart
        colors = ['#90EE90', '#FFD700', '#FF8C00', '#FF4500']
        fig.add_trace(
            go.Pie(
                labels=severity_dist.index,
                values=severity_dist.values,
                marker_colors=colors,
                name="Overdue Severity"
            ), row=1, col=1
        )
        
        # Days overdue histogram
        fig.add_trace(
            go.Histogram(
                x=current_overdue['days_overdue'],
                nbinsx=20,
                marker_color='#FF6B6B',
                opacity=0.7,
                name="Days Distribution"
            ), row=1, col=2
        )
        
        # Top overdue patrons
        top_overdue = overdue_by_patron.nlargest(10, 'total_days_overdue')
        fig.add_trace(
            go.Bar(
                x=top_overdue['total_days_overdue'],
                y=[name[:15] + '...' if len(name) > 15 else name for name in top_overdue.index],
                orientation='h',
                marker_color='#DC143C'
            ), row=2, col=1
        )
        
        # Overdue by genre
        genre_overdue = current_overdue.groupby('book_genre')['days_overdue'].count().sort_values(ascending=False)
        fig.add_trace(
            go.Bar(
                x=genre_overdue.values,
                y=genre_overdue.index,
                orientation='h',
                marker_color='#800080'
            ), row=2, col=2
        )
        
        fig.update_layout(
            title="OVERDUE BOOKS CRISIS ANALYSIS",
            template="plotly_white",
            height=800
        )
        
        return fig, {
            'total_overdue': len(current_overdue),
            'avg_days_overdue': current_overdue['days_overdue'].mean(),
            'max_days_overdue': current_overdue['days_overdue'].max(),
            'severity_distribution': severity_dist,
            'total_potential_fines': current_overdue['fine_amount'].sum()
        }
    
    def analyze_financial_performance(self):
        """Comprehensive financial analysis with revenue insights."""
        if self.payments_df.empty:
            return None
            
        # Monthly revenue trends
        self.payments_df['payment_month'] = self.payments_df['payment_date'].dt.to_period('M')
        monthly_revenue = self.payments_df.groupby('payment_month').agg({
            'amount_paid': 'sum',
            'payment_id': 'count'
        }).rename(columns={'payment_id': 'transaction_count'})
        
        # Payment status analysis
        status_analysis = self.payments_df.groupby('status').agg({
            'amount_paid': 'sum',
            'remaining_amount': 'sum',
            'payment_id': 'count'
        })
        
        # Revenue by category
        category_revenue = self.payments_df.groupby('patron_category')['amount_paid'].sum()
        
        # Payment type performance
        payment_type_performance = self.payments_df.groupby('payment_type').agg({
            'amount_paid': 'sum',
            'payment_id': 'count'
        })
        
        # Create comprehensive financial dashboard
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=('Monthly Revenue Trends', 'Payment Status Distribution', 
                          'Revenue by Patron Category', 'Payment Type Performance'),
            specs=[[{"secondary_y": True}, {"type": "pie"}],
                   [{"type": "bar"}, {"type": "bar"}]]
        )
        
        # Monthly revenue with transaction count
        fig.add_trace(
            go.Scatter(
                x=[str(m) for m in monthly_revenue.index],
                y=monthly_revenue['amount_paid'],
                mode='lines+markers',
                name='Revenue',
                line=dict(color='#2E8B57', width=3)
            ), row=1, col=1
        )
        
        fig.add_trace(
            go.Bar(
                x=[str(m) for m in monthly_revenue.index],
                y=monthly_revenue['transaction_count'],
                name='Transactions',
                marker_color='#87CEEB',
                opacity=0.6
            ), row=1, col=1, secondary_y=True
        )
        
        # Payment status pie
        fig.add_trace(
            go.Pie(
                labels=status_analysis.index,
                values=status_analysis['amount_paid'],
                name="Status Distribution"
            ), row=1, col=2
        )
        
        # Category revenue bar
        fig.add_trace(
            go.Bar(
                x=category_revenue.index,
                y=category_revenue.values,
                marker_color='#FF8C00'
            ), row=2, col=1
        )
        
        # Payment type performance
        fig.add_trace(
            go.Bar(
                x=payment_type_performance.index,
                y=payment_type_performance['amount_paid'],
                marker_color='#9370DB'
            ), row=2, col=2
        )
        
        fig.update_layout(
            title="Financial Performance Dashboard",
            template="plotly_white",
            height=800
        )
        
        return fig, {
            'total_revenue': self.payments_df['amount_paid'].sum(),
            'outstanding_amount': self.payments_df['remaining_amount'].sum(),
            'avg_payment': self.payments_df['amount_paid'].mean(),
            'total_transactions': len(self.payments_df),
            'monthly_revenue': monthly_revenue
        }
    
    def generate_predictive_insights(self):
        """Generate actionable insights and predictions."""
        insights = []
        recommendations = []
        
        # Borrowing insights
        if not self.borrowings_df.empty:
            if 'day_of_week' not in self.borrowings_df.columns:
                self.borrowings_df['day_of_week'] = self.borrowings_df['borrow_date'].dt.day
                
            # Peak borrowing analysis
            peak_day = self.borrowings_df['day_of_week'].mode().iloc[0] if not self.borrowings_df.empty else None
            if peak_day:
                insights.append(f"Peak borrowing day: {peak_day}")
                recommendations.append(f"Ensure adequate staffing on {peak_day}s")
            
            # Return rate analysis
            total_borrowings = len(self.borrowings_df)
            returned_books = len(self.borrowings_df[self.borrowings_df['returned'] == True])
            return_rate = (returned_books / total_borrowings) * 100 if total_borrowings > 0 else 0
            insights.append(f"Overall return rate: {return_rate:.1f}%")
            
            if return_rate < 85:
                recommendations.append("Consider implementing stricter return policies or automated reminders")
            
            # Genre popularity
            popular_genre = self.borrowings_df['book_genre'].mode().iloc[0] if not self.borrowings_df['book_genre'].empty else None
            if popular_genre:
                insights.append(f"Most popular genre: {popular_genre}")
                recommendations.append(f"Consider expanding {popular_genre} collection")
        
        # Financial insights
        if not self.payments_df.empty:
            # Revenue trends
            recent_revenue = self.payments_df[
                self.payments_df['payment_date'] >= (datetime.now() - timedelta(days=30))
            ]['amount_paid'].sum()
            insights.append(f"Revenue (last 30 days): ${recent_revenue:.2f}")
            
            # Outstanding payments
            outstanding = self.payments_df['remaining_amount'].sum()
            if outstanding > 0:
                insights.append(f"Outstanding payments: ${outstanding:.2f}")
                recommendations.append("Implement payment reminder system for outstanding balances")
        
        # Membership insights
        if not self.patrons_df.empty:
            active_members = len(self.patrons_df[self.patrons_df['membership_status'] == 'active'])
            total_patrons = len(self.patrons_df)
            membership_rate = (active_members / total_patrons) * 100 if total_patrons > 0 else 0
            insights.append(f"Active membership rate: {membership_rate:.1f}%")
            
            if membership_rate < 60:
                recommendations.append("Launch membership drive campaign to increase active memberships")
        
        return insights, recommendations
    
    def create_executive_summary(self):
        """Generate comprehensive executive summary with key metrics."""
        summary = {
            'generated_at': datetime.now().isoformat(),
            'period': '30 days',
            'key_metrics': {},
            'alerts': [],
            'recommendations': []
        }
        
        # Basic metrics
        with self.db_manager.get_session() as session:
            summary['key_metrics']['total_books'] = session.query(Book).count()
            summary['key_metrics']['total_patrons'] = session.query(Patron).count()
            summary['key_metrics']['active_borrowings'] = session.query(BorrowedBook).filter(
                BorrowedBook.return_date.is_(None)
            ).count()
        
        # Financial metrics
        if not self.payments_df.empty:
            summary['key_metrics']['total_revenue'] = float(self.payments_df['amount_paid'].sum())
            summary['key_metrics']['outstanding_payments'] = float(self.payments_df['remaining_amount'].sum())
        
        # Generate alerts
        if not self.borrowings_df.empty:
            overdue_count = len(self.borrowings_df[self.borrowings_df['days_overdue'] > 0])
            if overdue_count > 0:
                summary['alerts'].append(f"URGENT: {overdue_count} overdue books require immediate attention")
        
        # Get recommendations
        _, recommendations = self.generate_predictive_insights()
        summary['recommendations'] = recommendations
        
        return summary


In [15]:


# =============================================================================
# MAIN ANALYSIS EXECUTION
# =============================================================================

def run_comprehensive_analysis(db_path):
    """Run complete library analysis and generate report."""
    
    print("=" * 60)
    print("COMPREHENSIVE LIBRARY ANALYTICS REPORT")
    print("=" * 60)
    
    # Initialize analytics
    analytics = LibraryAnalytics(db_path)
    analytics.load_analysis_data()
    
    # Store all figures for export
    analysis_results = {}
    
    print("\n1. COLLECTION HEALTH ANALYSIS")
    print("-" * 40)
    collection_fig, availability_stats, genre_dist = analytics.analyze_collection_health()
    analysis_results['collection_health'] = collection_fig
    print(f"Total Books: {availability_stats['total_books']}")
    print(f"Availability Rate: {availability_stats['availability_rate']:.1f}%")
    print(f"Most Popular Genre: {genre_dist.index[0]} ({genre_dist.iloc[0]} books)")
    fig, stats, dist = analytics.analyze_collection_health()
    fig.show()
    stats.show()
    dist.show()
    
    print("\n2. BORROWING PATTERNS")
    print("-" * 40)
    borrowing_fig, borrowing_stats = analytics.analyze_borrowing_patterns()
    analysis_results['borrowing_patterns'] = borrowing_fig
    print(f"Total Borrowings: {borrowing_stats['total_borrowings']}")
    print(f"Peak Borrowing Day: {borrowing_stats['peak_day']}")
    
    print("\n3. PATRON BEHAVIOR")
    print("-" * 40)
    patron_fig, patron_activity = analytics.analyze_patron_behavior()
    analysis_results['patron_behavior'] = patron_fig
    print(f"Most Active Patron: {patron_activity['total_borrowings'].idxmax()}")
    print(f"Average Books per Patron: {patron_activity['total_borrowings'].mean():.1f}")
    
    print("\n4. OVERDUE ANALYSIS")
    print("-" * 40)
    overdue_result = analytics.analyze_overdue_crisis()
    if overdue_result:
        overdue_fig, overdue_stats = overdue_result
        analysis_results['overdue_analysis'] = overdue_fig
        print(f"CRITICAL: {overdue_stats['total_overdue']} books are overdue")
        print(f"Average Days Overdue: {overdue_stats['avg_days_overdue']:.1f}")
        print(f"Longest Overdue: {overdue_stats['max_days_overdue']} days")
    else:
        print("No overdue books - Excellent!")
    
    print("\n5. FINANCIAL PERFORMANCE")
    print("-" * 40)
    financial_fig, financial_stats = analytics.analyze_financial_performance()
    analysis_results['financial_performance'] = financial_fig
    print(f"Total Revenue: ${financial_stats['total_revenue']:.2f}")
    print(f"Outstanding Amount: ${financial_stats['outstanding_amount']:.2f}")
    print(f"Average Payment: ${financial_stats['avg_payment']:.2f}")
    
    print("\n6. EXECUTIVE SUMMARY")
    print("-" * 40)
    summary = analytics.create_executive_summary()
    
    print("KEY INSIGHTS:")
    insights, recommendations = analytics.generate_predictive_insights()
    for insight in insights[:5]:  # Show top 5 insights
        print(f"  • {insight}")
    
    print("\nACTION ITEMS:")
    for rec in recommendations[:5]:  # Show top 5 recommendations
        print(f"  → {rec}")
    
    print("\n" + "=" * 60)
    print("ANALYSIS COMPLETE")
    print("=" * 60)
    
    return analysis_results, summary

In [16]:


# =============================================================================
# PRODUCTION FUNCTIONS FOR PYQT5 INTEGRATION
# =============================================================================

class LibraryDashboardAPI:
    """Production-ready API for PyQt5 dashboard integration."""
    
    def __init__(self, db_path):
        self.analytics = LibraryAnalytics(db_path)
        self.analytics.load_analysis_data()
    
    def get_kpi_metrics(self):
        """Get key performance indicators for dashboard cards."""
        with self.analytics.db_manager.get_session() as session:
            metrics = {
                'total_books': session.query(Book).count(),
                'available_books': session.query(Book).filter(Book.is_available == True).count(),
                'total_patrons': session.query(Patron).count(),
                'active_members': session.query(Patron).filter(
                    Patron.membership_status == 'active'
                ).count(),
                'active_borrowings': session.query(BorrowedBook).filter(
                    BorrowedBook.return_date.is_(None)
                ).count(),
                'overdue_books': session.query(BorrowedBook).filter(
                    and_(
                        BorrowedBook.return_date.is_(None),
                        BorrowedBook.due_date < datetime.now()
                    )
                ).count(),
                'total_revenue': float(self.analytics.payments_df['amount_paid'].sum()) if not self.analytics.payments_df.empty else 0.0,
                'outstanding_payments': float(self.analytics.payments_df['remaining_amount'].sum()) if not self.analytics.payments_df.empty else 0.0
            }
        
        return metrics
    
    def get_chart_data(self, chart_type, **kwargs):
        """Get data for specific chart types."""
        chart_methods = {
            'borrowing_trends': self._get_borrowing_trends_data,
            'popular_books': self._get_popular_books_data,
            'overdue_analysis': self._get_overdue_analysis_data,
            'financial_overview': self._get_financial_overview_data,
            'patron_categories': self._get_patron_categories_data,
            'collection_health': self._get_collection_health_data
        }
        
        if chart_type in chart_methods:
            return chart_methods[chart_type](**kwargs)
        else:
            raise ValueError(f"Unknown chart type: {chart_type}")
    
    def _get_borrowing_trends_data(self, days=30):
        """Get borrowing trends data."""
        if self.analytics.borrowings_df.empty:
            return {'dates': [], 'counts': [], 'total': 0}
        
        end_date = datetime.now()
        start_date = end_date - timedelta(days=days)
        
        filtered_df = self.analytics.borrowings_df[
            self.analytics.borrowings_df['borrow_date'].between(start_date, end_date)
        ]
        
        daily_counts = filtered_df.groupby(filtered_df['borrow_date'].dt.date).size()
        
        return {
            'dates': [str(date) for date in daily_counts.index],
            'counts': daily_counts.values.tolist(),
            'total': int(daily_counts.sum()),
            'average': float(daily_counts.mean()) if not daily_counts.empty else 0.0,
            'peak_date': str(daily_counts.idxmax()) if not daily_counts.empty else None
        }
    
    def _get_popular_books_data(self, limit=10):
        """Get most popular books data."""
        if self.analytics.borrowings_df.empty:
            return {'titles': [], 'authors': [], 'counts': [], 'genres': []}
        
        popularity = self.analytics.borrowings_df.groupby(['book_title', 'book_author', 'book_genre']).size().reset_index(name='count')
        top_books = popularity.nlargest(limit, 'count')
        
        return {
            'titles': [title[:40] + '...' if len(title) > 40 else title for title in top_books['book_title'].tolist()],
            'authors': top_books['book_author'].tolist(),
            'counts': top_books['count'].tolist(),
            'genres': top_books['book_genre'].tolist(),
            'full_titles': top_books['book_title'].tolist()
        }
    
    def _get_overdue_analysis_data(self):
        """Get overdue books analysis data."""
        if self.analytics.borrowings_df.empty:
            return {'severity_labels': [], 'severity_counts': [], 'total_overdue': 0}
        
        current_overdue = self.analytics.borrowings_df[
            (self.analytics.borrowings_df['return_date'].isna()) & 
            (self.analytics.borrowings_df['days_overdue'] > 0)
        ].copy()
        
        if current_overdue.empty:
            return {'severity_labels': [], 'severity_counts': [], 'total_overdue': 0}
        
        current_overdue['severity'] = pd.cut(
            current_overdue['days_overdue'],
            bins=[0, 7, 30, 90, float('inf')],
            labels=['1-7 days', '8-30 days', '31-90 days', '90+ days'],
            include_lowest=True
        )
        
        severity_dist = current_overdue['severity'].value_counts()
        
        return {
            'severity_labels': severity_dist.index.tolist(),
            'severity_counts': severity_dist.values.tolist(),
            'total_overdue': len(current_overdue),
            'avg_days_overdue': float(current_overdue['days_overdue'].mean()),
            'max_days_overdue': int(current_overdue['days_overdue'].max()),
            'overdue_details': current_overdue[['patron_name', 'book_title', 'days_overdue']].to_dict('records')
        }
    
    def _get_financial_overview_data(self, months=6):
        """Get financial overview data."""
        if self.analytics.payments_df.empty:
            return {'months': [], 'revenues': [], 'transaction_counts': []}
        
        monthly_data = self.analytics.payments_df.groupby(
            self.analytics.payments_df['payment_date'].dt.to_period('M')
        ).agg({
            'amount_paid': 'sum',
            'payment_id': 'count'
        }).tail(months)
        
        # Payment status distribution
        status_dist = self.analytics.payments_df['status'].value_counts()
        
        return {
            'months': [str(month) for month in monthly_data.index],
            'revenues': monthly_data['amount_paid'].tolist(),
            'transaction_counts': monthly_data['payment_id'].tolist(),
            'status_labels': status_dist.index.tolist(),
            'status_counts': status_dist.values.tolist(),
            'total_revenue': float(self.analytics.payments_df['amount_paid'].sum()),
            'outstanding_amount': float(self.analytics.payments_df['remaining_amount'].sum())
        }
    
    def _get_patron_categories_data(self):
        """Get patron category distribution."""
        if self.analytics.patrons_df.empty:
            return {'categories': [], 'counts': [], 'membership_stats': {}}
        
        category_dist = self.analytics.patrons_df['category'].value_counts()
        membership_stats = self.analytics.patrons_df.groupby('category')['membership_status'].value_counts().unstack(fill_value=0)
        
        return {
            'categories': category_dist.index.tolist(),
            'counts': category_dist.values.tolist(),
            'membership_stats': membership_stats.to_dict() if not membership_stats.empty else {}
        }
    
    def _get_collection_health_data(self):
        """Get collection health metrics."""
        if self.analytics.books_df.empty:
            return {'genres': [], 'counts': [], 'availability_rate': 0}
        
        genre_dist = self.analytics.books_df['class_name'].value_counts()
        total_books = len(self.analytics.books_df)
        available_books = len(self.analytics.books_df[self.analytics.books_df['is_available'] == True])
        availability_rate = (available_books / total_books) * 100 if total_books > 0 else 0
        
        return {
            'genres': genre_dist.index.tolist(),
            'counts': genre_dist.values.tolist(),
            'total_books': total_books,
            'available_books': available_books,
            'availability_rate': float(availability_rate)
        }
    
    def generate_plotly_figure(self, chart_type, **kwargs):
        """Generate Plotly figure JSON for specific chart type."""
        try:
            if chart_type == 'borrowing_trends':
                return self.analytics.analyze_borrowing_patterns()[0].to_json()
            elif chart_type == 'collection_health':
                return self.analytics.analyze_collection_health()[0].to_json()
            elif chart_type == 'patron_behavior':
                return self.analytics.analyze_patron_behavior()[0].to_json()
            elif chart_type == 'overdue_analysis':
                result = self.analytics.analyze_overdue_crisis()
                return result[0].to_json() if result else '{}'
            elif chart_type == 'financial_performance':
                return self.analytics.analyze_financial_performance()[0].to_json()
            else:
                return '{}'
        except Exception as e:
            print(f"Error generating {chart_type} figure: {e}")
            return '{}'
    
    def get_alerts_and_recommendations(self):
        """Get current alerts and actionable recommendations."""
        insights, recommendations = self.analytics.generate_predictive_insights()
        
        # Generate alerts based on current data
        alerts = []
        kpis = self.get_kpi_metrics()
        
        if kpis['overdue_books'] > 0:
            alerts.append({
                'type': 'critical',
                'message': f"{kpis['overdue_books']} books are overdue and require immediate attention",
                'action': 'Send overdue notices to patrons'
            })
        
        if kpis['available_books'] / kpis['total_books'] < 0.3:  # Less than 30% availability
            alerts.append({
                'type': 'warning',
                'message': f"Low book availability: Only {kpis['available_books']} of {kpis['total_books']} books available",
                'action': 'Review borrowing policies or acquire more popular titles'
            })
        
        if kpis['outstanding_payments'] > 0:
            alerts.append({
                'type': 'info',
                'message': f"Outstanding payments: ${kpis['outstanding_payments']:.2f}",
                'action': 'Follow up on pending payments'
            })
        
        return {
            'alerts': alerts,
            'insights': insights,
            'recommendations': recommendations
        }


In [17]:

# =============================================================================
# USAGE EXAMPLE AND TESTING
# =============================================================================

def main():
    """Example usage of the enhanced analytics system."""
    
    # Replace with your actual database path
    DB_PATH = "/home/tjselevani/Desktop/Apps/vscode/python/library_system/library_system.db"
    
    try:
        print("Initializing Library Analytics System...")
        
        # Run comprehensive analysis
        results, summary = run_comprehensive_analysis(DB_PATH)
        
        # Test production API
        print("\nTesting Production API...")
        dashboard_api = LibraryDashboardAPI(DB_PATH)
        
        # Get KPI metrics
        kpis = dashboard_api.get_kpi_metrics()
        print("KPI Metrics:", kpis)
        
        # Get chart data examples
        trends_data = dashboard_api.get_chart_data('borrowing_trends', days=30)
        print("Borrowing Trends Sample:", trends_data)
        
        # Get alerts and recommendations
        alerts = dashboard_api.get_alerts_and_recommendations()
        print("Alerts and Recommendations:", alerts)
        
        print("\nAnalysis completed successfully!")
        return results, summary, dashboard_api
        
    except Exception as e:
        print(f"Error during analysis: {e}")
        return None, None, None
    
    


In [18]:

# For Jupyter notebook execution
if __name__ == "__main__":
    # Set your database path here
    DB_PATH = "/home/tjselevani/Desktop/Apps/vscode/python/library_system/library_system.db"
    
    # Run the analysis
    results, summary, api = main()
    
    if results:
        print("\n" + "="*50)
        print("ANALYSIS COMPLETE - Ready for PyQt5 Integration!")
        print("="*50)
        
        # Show available functions for PyQt5
        print("\nAvailable functions for PyQt5 integration:")
        print("1. api.get_kpi_metrics() - Get dashboard KPIs")
        print("2. api.get_chart_data(chart_type) - Get data for charts")
        print("3. api.generate_plotly_figure(chart_type) - Get Plotly JSON")
        print("4. api.get_alerts_and_recommendations() - Get alerts/recommendations")
        
        print("\nChart types available:")
        chart_types = [
            'borrowing_trends', 'popular_books', 'overdue_analysis',
            'financial_overview', 'patron_categories', 'collection_health'
        ]
        for i, chart_type in enumerate(chart_types, 1):
            print(f"{i}. {chart_type}")
            
        print("\nExample usage in PyQt5:")
        print("```python")
        print("from your_analytics_module import LibraryDashboardAPI")
        print("api = LibraryDashboardAPI('path/to/db')")
        print("kpis = api.get_kpi_metrics()")
        print("chart_json = api.generate_plotly_figure('borrowing_trends')")
        print("```")
        
    else:
        print("Analysis failed. Please check your database path and connection.")

Initializing Library Analytics System...
COMPREHENSIVE LIBRARY ANALYTICS REPORT
Database connection established successfully!

Database Overview:
  Books: 4
  Patrons: 4
  Borrowed Books: 1
  Payments: 4

Data loaded successfully:
  Books: 4
  Patrons: 4
  Borrowings: 1
  Payments: 4

1. COLLECTION HEALTH ANALYSIS
----------------------------------------
Total Books: 4
Availability Rate: 75.0%
Most Popular Genre: CS101 (1 books)


Error during analysis: 'dict' object has no attribute 'show'
Analysis failed. Please check your database path and connection.
