In [None]:
# notebooks/library_data_analysis.ipynb
"""
Library Management System - Data Analysis Notebook
This notebook explores library data and creates visualizations for insights.
"""

In [None]:

# Cell 1: Setup and Imports
import sys
import os
# Add the project root to the path
# sys.path.append(os.path.join(os.getcwd(), '../..'))
project_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))
if project_root not in sys.path:
    sys.path.append(project_root)

import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import sqlite3
from sqlalchemy import create_engine, func
from sqlalchemy.orm import sessionmaker

# Import your models and services
from utils.database_manager import MyDatabaseManager as DatabaseManager
from services.analytics_service import AnalyticsService
from db.models import Book, BorrowedBook, Patron, Payment


In [None]:

# Cell 2: Database Connection Setup
# Initialize database connection
path = "/home/tjselevani/Desktop/Apps/vscode/python/library_system/library_system.db"
db_manager = DatabaseManager(path)
analytics_service = AnalyticsService(db_manager)

# Also create pandas-friendly connection
engine = DatabaseManager().get_engine()

print("Database connection established successfully!")

In [None]:
pd.read_sql_query("PRAGMA table_info(books);", engine)

In [None]:
pd.read_sql_query("PRAGMA table_info(borrowed_books);", engine)


In [None]:
pd.read_sql_query("PRAGMA table_info(patrons);", engine)

In [None]:
pd.read_sql_query("PRAGMA table_info(payments);", engine)

In [None]:

# Cell 3: Data Exploration - Basic Statistics
def explore_basic_stats():
    """Explore basic statistics about the library data."""
    with db_manager.get_session() as session:
        total_books = session.query(Book).count()
        total_patrons = session.query(Patron).count()
        total_borrowings = session.query(BorrowedBook).count()
        active_borrowings = session.query(BorrowedBook).filter(
            BorrowedBook.return_date.is_(None)
        ).count()
        
        print(f"📊 Library Statistics:")
        print(f"   Total Books: {total_books}")
        print(f"   Total Patrons: {total_patrons}")
        print(f"   Total Borrowings: {total_borrowings}")
        print(f"   Active Borrowings: {active_borrowings}")
        print(f"   Return Rate: {((total_borrowings - active_borrowings) / total_borrowings * 100):.1f}%" if total_borrowings > 0 else "No borrowings yet")

explore_basic_stats()

In [None]:

# Cell 4: Load Data into DataFrames for Analysis
def load_data_for_analysis():
    """Load all relevant data into pandas DataFrames."""

    # Books data
    books_df = pd.read_sql_query("""
        SELECT book_id, title, author, isbn, class_name, accession_no,
               is_available, created_at
        FROM books
    """, engine)

    # Patrons data
    patrons_df = pd.read_sql_query("""
        SELECT user_id, patron_id, first_name, last_name, institution, 
               grade_level, category, age, gender, date_of_birth, residence,
               phone_number, membership_status, membership_start_date,
               membership_expiry_date, membership_type
        FROM patrons
    """, engine)

    # Borrowed books with related data
    borrowings_df = pd.read_sql_query("""
        SELECT bb.borrow_id, bb.user_id, bb.book_id, bb.borrow_date, 
               bb.due_date, bb.return_date, bb.returned, bb.fine_amount,
               b.title AS book_title, b.author AS book_author, b.class_name,
               p.first_name || ' ' || p.last_name AS patron_name,
               p.phone_number AS patron_phone
        FROM borrowed_books bb
        JOIN books b ON bb.book_id = b.book_id
        JOIN patrons p ON bb.user_id = p.user_id
    """, engine)

    # Payments data
    payments_df = pd.read_sql_query("""
        SELECT py.payment_id, py.user_id, py.payment_item_id,
               py.amount_paid, py.total_amount_due, py.payment_date, py.status,
               py.membership_start_date, py.membership_expiry_date, py.is_membership_active,
               py.notes,
               p.first_name || ' ' || p.last_name AS patron_name
        FROM payments py
        JOIN patrons p ON py.user_id = p.user_id
    """, engine)

    # Convert date columns
    borrowings_df['borrow_date'] = pd.to_datetime(borrowings_df['borrow_date'])
    borrowings_df['due_date'] = pd.to_datetime(borrowings_df['due_date'])
    borrowings_df['return_date'] = pd.to_datetime(borrowings_df['return_date'])
    payments_df['payment_date'] = pd.to_datetime(payments_df['payment_date'])
    patrons_df['membership_start_date'] = pd.to_datetime(patrons_df['membership_start_date'])
    patrons_df['membership_expiry_date'] = pd.to_datetime(patrons_df['membership_expiry_date'])
    patrons_df['date_of_birth'] = pd.to_datetime(patrons_df['date_of_birth'])

    return books_df, patrons_df, borrowings_df, payments_df


books_df, patrons_df, borrowings_df, payments_df = load_data_for_analysis()
print("✅ Data loaded successfully!")
print(f"Books: {len(books_df)}, Patrons: {len(patrons_df)}, Borrowings: {len(borrowings_df)}, Payments: {len(payments_df)}")

In [None]:

# Cell 5: Genre Analysis
def analyze_book_genres():
    """Analyze book distribution by genre."""
    if books_df.empty:
        print("No book data available")
        return None
    
    genre_counts = books_df['class_name'].value_counts()
    
    fig = px.pie(
        values=genre_counts.values,
        names=genre_counts.index,
        title="Book Distribution by Genre"
    )
    fig.update_traces(textposition='inside', textinfo='percent+label')
    fig.show()
    
    return genre_counts

genre_analysis = analyze_book_genres()

In [None]:

# Cell 6: Borrowing Patterns Analysis
def analyze_borrowing_patterns():
    """Analyze borrowing patterns over time."""
    if borrowings_df.empty:
        print("No borrowing data available")
        return None
    
    # Monthly borrowing trends
    borrowings_df['month'] = borrowings_df['borrow_date'].dt.to_period('M')
    monthly_borrowings = borrowings_df.groupby('month').size()
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=[str(m) for m in monthly_borrowings.index],
        y=monthly_borrowings.values,
        mode='lines+markers',
        name='Monthly Borrowings',
        line=dict(color='#2E86AB', width=3),
        marker=dict(size=8)
    ))
    
    fig.update_layout(
        title='Monthly Borrowing Trends',
        xaxis_title='Month',
        yaxis_title='Number of Books Borrowed',
        template='plotly_white'
    )
    fig.show()
    
    # Day of week analysis
    borrowings_df['day_of_week'] = borrowings_df['borrow_date'].dt.day_name()
    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    dow_borrowings = borrowings_df['day_of_week'].value_counts().reindex(day_order)
    
    fig2 = go.Figure(go.Bar(
        x=dow_borrowings.index,
        y=dow_borrowings.values,
        marker_color='#A23B72'
    ))
    
    fig2.update_layout(
        title='Borrowings by Day of Week',
        xaxis_title='Day of Week',
        yaxis_title='Number of Borrowings',
        template='plotly_white'
    )
    fig2.show()
    
    return monthly_borrowings, dow_borrowings

borrowing_patterns = analyze_borrowing_patterns()

In [None]:

# Cell 7: Popular Authors and Books
def analyze_popular_content():
    """Analyze most popular authors and books."""
    if borrowings_df.empty:
        print("No borrowing data available")
        return None
    
    # Most popular authors
    author_popularity = borrowings_df.groupby('book_author').size().sort_values(ascending=False).head(10)
    
    fig1 = go.Figure(go.Bar(
        x=author_popularity.values,
        y=author_popularity.index,
        orientation='h',
        marker_color='#F18F01'
    ))
    
    fig1.update_layout(
        title='Top 10 Most Popular Authors',
        xaxis_title='Number of Borrowings',
        yaxis_title='Author',
        template='plotly_white'
    )
    fig1.show()
    
    # Most popular books
    book_popularity = borrowings_df.groupby('book_title').size().sort_values(ascending=False).head(10)
    
    fig2 = go.Figure(go.Bar(
        x=book_popularity.values,
        y=[title[:30] + '...' if len(title) > 30 else title for title in book_popularity.index],
        orientation='h',
        marker_color='#C73E1D'
    ))
    
    fig2.update_layout(
        title='Top 10 Most Popular Books',
        xaxis_title='Number of Borrowings',
        yaxis_title='Book Title',
        template='plotly_white'
    )
    fig2.show()
    
    return author_popularity, book_popularity

popular_content = analyze_popular_content()

In [None]:

# Cell 8: Overdue Analysis
def analyze_overdue_books():
    """Analyze overdue book patterns."""
    if borrowings_df.empty:
        print("No borrowing data available")
        return None
    
    # Calculate overdue books
    current_date = datetime.now()
    active_borrowings = borrowings_df[borrowings_df['return_date'].isna()].copy()
    active_borrowings['is_overdue'] = active_borrowings['due_date'] < current_date
    active_borrowings['days_overdue'] = (current_date - active_borrowings['due_date']).dt.days
    
    overdue_books = active_borrowings[active_borrowings['is_overdue']]
    
    if overdue_books.empty:
        print("✅ No overdue books!")
        return None
    
    print(f"📚 Overdue Analysis:")
    print(f"   Total overdue books: {len(overdue_books)}")
    print(f"   Average days overdue: {overdue_books['days_overdue'].mean():.1f}")
    print(f"   Maximum days overdue: {overdue_books['days_overdue'].max()}")
    
    # Overdue distribution histogram
    fig = go.Figure(go.Histogram(
        x=overdue_books['days_overdue'],
        nbinsx=20,
        marker_color='#FF6B6B',
        opacity=0.7
    ))
    
    fig.update_layout(
        title='Distribution of Overdue Days',
        xaxis_title='Days Overdue',
        yaxis_title='Number of Books',
        template='plotly_white'
    )
    fig.show()
    
    return overdue_books

overdue_analysis = analyze_overdue_books()

In [None]:
# Cell 9: Financial Analysis
def analyze_finances():
    """Analyze library financial data."""
    if payments_df.empty:
        print("No payment data available")
        return None
    
    # Monthly revenue
    payments_df['month'] = payments_df['payment_date'].dt.to_period('M')
    monthly_revenue = payments_df.groupby('month')['amount'].sum()
    
    fig1 = go.Figure(go.Bar(
        x=[str(m) for m in monthly_revenue.index],
        y=monthly_revenue.values,
        marker_color='#2E8B57'
    ))
    
    fig1.update_layout(
        title='Monthly Revenue',
        xaxis_title='Month',
        yaxis_title='Revenue ($)',
        template='plotly_white'
    )
    fig1.show()
    
    # Payment type distribution
    payment_type_dist = payments_df['payment_type'].value_counts()
    
    fig2 = px.pie(
        values=payment_type_dist.values,
        names=payment_type_dist.index,
        title="Revenue by Payment Type"
    )
    fig2.show()
    
    print(f"💰 Financial Summary:")
    print(f"   Total Revenue: ${payments_df['amount_paid'].sum():.2f}")
    print(f"   Average Payment: ${payments_df['amount_paid'].mean():.2f}")
    print(f"   Number of Payments: {len(payments_df)}")
    
    return monthly_revenue, payment_type_dist

financial_analysis = analyze_finances()

In [None]:

# Cell 10: Patron Behavior Analysis
def analyze_patron_behavior():
    """Analyze patron borrowing behavior."""
    if borrowings_df.empty or patrons_df.empty:
        print("No patron or borrowing data available")
        return None
    
    # Patron activity summary
    patron_activity = borrowings_df.groupby('patron_name').agg({
        'borrow_id': 'count',  # Number of borrowings
        'book_title': lambda x: x.nunique()  # Number of unique books
    }).rename(columns={'borrow_id': 'total_borrowings', 'book_title': 'unique_books'})
    
    # Add payment data if available
    if not payments_df.empty:
        patron_payments = payments_df.groupby('patron_name')['amount'].sum()
        patron_activity = patron_activity.join(patron_payments, how='left')
        patron_activity['amount'] = patron_activity['amount'].fillna(0)
    else:
        patron_activity['amount'] = 0
    
    # Create scatter plot
    fig = go.Figure(go.Scatter(
        x=patron_activity['total_borrowings'],
        y=patron_activity['amount'],
        mode='markers',
        text=patron_activity.index,
        marker=dict(
            size=patron_activity['unique_books'] * 2,
            color=patron_activity['total_borrowings'],
            colorscale='Viridis',
            showscale=True,
            sizemode='diameter',
            sizeref=2.*max(patron_activity['unique_books'])/(40.**2),
            sizemin=4
        ),
        hovertemplate='<b>%{text}</b><br>' +
                      'Total Borrowings: %{x}<br>' +
                      'Total Payments: $%{y}<br>' +
                      'Unique Books: %{marker.size}<br>' +
                      '<extra></extra>'
    ))
    
    fig.update_layout(
        title='Patron Activity Analysis',
        xaxis_title='Total Borrowings',
        yaxis_title='Total Payments ($)',
        template='plotly_white'
    )
    fig.show()
    
    return patron_activity

patron_behavior = analyze_patron_behavior()

In [None]:

# Cell 11: Advanced Analytics - Correlation Analysis
def advanced_correlation_analysis():
    """Perform correlation analysis between different metrics."""
    if borrowings_df.empty:
        print("No data available for correlation analysis")
        return None
    
    # Create correlation matrix
    # Aggregate data by book for correlation
    book_metrics = borrowings_df.groupby('book_id').agg({
        'id': 'count',  # Popularity (number of times borrowed)
        'return_date': lambda x: x.isna().sum(),  # Currently borrowed
    }).rename(columns={'id': 'popularity', 'return_date': 'current_borrowings'})
    
    # Add book details
    book_metrics = book_metrics.merge(
        books_df[['id', 'publication_year', 'total_copies']],
        left_index=True,
        right_on='id'
    ).set_index('id')
    
    # Calculate correlation matrix
    correlation_matrix = book_metrics.corr()
    
    # Create heatmap
    fig = go.Figure(data=go.Heatmap(
        z=correlation_matrix.values,
        x=correlation_matrix.columns,
        y=correlation_matrix.columns,
        colorscale='RdBu',
        zmid=0,
        text=correlation_matrix.round(2).values,
        texttemplate="%{text}",
        textfont={"size": 10}
    ))
    
    fig.update_layout(
        title='Book Metrics Correlation Analysis',
        template='plotly_white'
    )
    fig.show()
    
    return correlation_matrix

correlation_analysis = advanced_correlation_analysis()

In [None]:

# Cell 12: Seasonal Analysis
def analyze_seasonal_patterns():
    """Analyze seasonal borrowing patterns."""
    if borrowings_df.empty:
        print("No borrowing data available")
        return None
    
    # Extract seasonal data
    borrowings_df['season'] = borrowings_df['borrow_date'].dt.month.map({
        12: 'Winter', 1: 'Winter', 2: 'Winter',
        3: 'Spring', 4: 'Spring', 5: 'Spring',
        6: 'Summer', 7: 'Summer', 8: 'Summer',
        9: 'Fall', 10: 'Fall', 11: 'Fall'
    })
    
    seasonal_borrowings = borrowings_df['season'].value_counts()
    season_order = ['Spring', 'Summer', 'Fall', 'Winter']
    seasonal_borrowings = seasonal_borrowings.reindex(season_order)
    
    # Create seasonal analysis plot
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=('Borrowings by Season', 'Monthly Distribution'),
        specs=[[{"type": "bar"}, {"type": "scatter"}]]
    )
    
    # Seasonal bar chart
    fig.add_trace(
        go.Bar(
            x=seasonal_borrowings.index,
            y=seasonal_borrowings.values,
            marker_color=['#90EE90', '#FFD700', '#FF8C00', '#87CEEB'],
            name="Season"
        ),
        row=1, col=1
    )
    
    # Monthly trend line
    monthly_dist = borrowings_df['borrowed_date'].dt.month.value_counts().sort_index()
    fig.add_trace(
        go.Scatter(
            x=monthly_dist.index,
            y=monthly_dist.values,
            mode='lines+markers',
            name="Monthly",
            line=dict(color='#FF6347', width=3)
        ),
        row=1, col=2
    )
    
    fig.update_layout(
        title='Seasonal Borrowing Patterns',
        template='plotly_white',
        showlegend=False
    )
    fig.show()
    
    return seasonal_borrowings

seasonal_analysis = analyze_seasonal_patterns()

In [None]:

# Cell 13: Predictive Insights
def generate_predictive_insights():
    """Generate predictive insights and recommendations."""
    insights = []
    
    if not borrowings_df.empty:
        # Peak borrowing times
        hour_analysis = borrowings_df['borrow_date'].dt.hour.value_counts().sort_index()
        peak_hour = hour_analysis.idxmax()
        insights.append(f"📈 Peak borrowing hour: {peak_hour}:00")
        
        # Average borrowing duration
        completed_borrowings = borrowings_df.dropna(subset=['return_date'])
        if not completed_borrowings.empty:
            avg_duration = (completed_borrowings['return_date'] - completed_borrowings['borrowed_date']).dt.days.mean()
            insights.append(f"⏱️ Average borrowing duration: {avg_duration:.1f} days")
        
        # Most active patron
        if 'patron_name' in borrowings_df.columns:
            most_active = borrowings_df['patron_name'].value_counts().index[0]
            most_active_count = borrowings_df['patron_name'].value_counts().iloc[0]
            insights.append(f"🏆 Most active patron: {most_active} ({most_active_count} borrowings)")
    
    if not payments_df.empty:
        # Revenue trends
        recent_revenue = payments_df[payments_df['payment_date'] >= (datetime.now() - timedelta(days=30))]['amount'].sum()
        insights.append(f"💰 Revenue (last 30 days): ${recent_revenue:.2f}")
    
    print("🔮 Predictive Insights:")
    for insight in insights:
        print(f"   {insight}")
    
    return insights

predictive_insights = generate_predictive_insights()

In [None]:

# Cell 14: Export Functions for Production Use
def create_production_chart_functions():
    """Create functions that can be exported for production use."""
    
    def get_borrowing_trend_data(days=30):
        """Production function for borrowing trends."""
        end_date = datetime.now()
        start_date = end_date - timedelta(days=days)
        
        trend_data = borrowings_df[
            borrowings_df['borrow_date'].between(start_date, end_date)
        ].groupby(borrowings_df['borrow_date'].dt.date).size()
        
        return {
            'dates': [str(date) for date in trend_data.index],
            'counts': trend_data.values.tolist(),
            'total': trend_data.sum()
        }
    
    def get_genre_distribution_data():
        """Production function for genre distribution."""
        if books_df.empty:
            return {'labels': [], 'values': []}
        
        genre_dist = books_df['genre'].value_counts()
        return {
            'labels': genre_dist.index.tolist(),
            'values': genre_dist.values.tolist()
        }
    
    def get_top_books_data(limit=10):
        """Production function for top books."""
        if borrowings_df.empty:
            return {'titles': [], 'counts': []}
        
        top_books = borrowings_df['book_title'].value_counts().head(limit)
        return {
            'titles': [title[:30] + '...' if len(title) > 30 else title for title in top_books.index],
            'counts': top_books.values.tolist(),
            'full_titles': top_books.index.tolist()
        }
    
    return {
        'borrowing_trend': get_borrowing_trend_data,
        'genre_distribution': get_genre_distribution_data,
        'top_books': get_top_books_data
    }

production_functions = create_production_chart_functions()

In [None]:

# Cell 15: Test Production Functions
print("🧪 Testing Production Functions:")
print("Borrowing Trend Sample:", production_functions['borrowing_trend'](7))
print("Genre Distribution Sample:", production_functions['genre_distribution']())
print("Top Books Sample:", production_functions['top_books'](5))

# Cell 16: Summary Report
def generate_summary_report():
    """Generate a comprehensive summary report."""
    report = {
        'timestamp': datetime.now().isoformat(),
        'data_overview': {
            'total_books': len(books_df),
            'total_patrons': len(patrons_df),
            'total_borrowings': len(borrowings_df),
            'total_payments': len(payments_df)
        },
        'key_metrics': {},
        'recommendations': []
    }
    
    if not borrowings_df.empty:
        active_borrowings = borrowings_df[borrowings_df['return_date'].isna()]
        overdue_borrowings = active_borrowings[active_borrowings['due_date'] < datetime.now()]
        
        report['key_metrics'].update({
            'active_borrowings': len(active_borrowings),
            'overdue_books': len(overdue_borrowings),
            'return_rate': ((len(borrowings_df) - len(active_borrowings)) / len(borrowings_df) * 100) if len(borrowings_df) > 0 else 0
        })
        
        # Generate recommendations
        if len(overdue_borrowings) > 0:
            report['recommendations'].append("📧 Send overdue notices to patrons with overdue books")
        
        if len(active_borrowings) > len(books_df) * 0.7:
            report['recommendations'].append("📚 Consider acquiring more copies of popular books")
    
    if not payments_df.empty:
        total_revenue = payments_df['amount'].sum()
        avg_payment = payments_df['amount'].mean()
        
        report['key_metrics'].update({
            'total_revenue': total_revenue,
            'average_payment': avg_payment
        })
    
    print("📊 LIBRARY ANALYTICS SUMMARY REPORT")
    print("=" * 50)
    print(f"Generated: {report['timestamp']}")
    print("\n📈 Data Overview:")
    for key, value in report['data_overview'].items():
        print(f"   {key.replace('_', ' ').title()}: {value}")
    
    print("\n🎯 Key Metrics:")
    for key, value in report['key_metrics'].items():
        if isinstance(value, float):
            print(f"   {key.replace('_', ' ').title()}: {value:.2f}")
        else:
            print(f"   {key.replace('_', ' ').title()}: {value}")
    
    if report['recommendations']:
        print("\n💡 Recommendations:")
        for rec in report['recommendations']:
            print(f"   {rec}")
    
    return report

final_report = generate_summary_report()

print("\n✅ Analysis Complete! You can now use the production functions in your PyQt5 application.")