# Financial Operations Analytics

This notebook provides an interactive walkthrough of the Financial Operations Analytics project, including:
1. Data Generation & Exploration
2. Revenue Forecasting (Prophet & ARIMA)
3. Customer Churn Prediction
4. Profitability Analysis & Segmentation
5. Cohort & RFM Analysis
6. Executive Reporting

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import warnings
warnings.filterwarnings('ignore')

# Set visualization style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('viridis')

# Add src to path
sys.path.insert(0, '../src')

# Set base directory
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath('__file__')))
print(f'Base Directory: {BASE_DIR}')

## 1. Data Generation

Generate synthetic financial data with realistic patterns including:
- Customer growth patterns
- Seasonality in revenue
- Churn behavior
- Varied customer profitability

In [None]:
from data_generator import FinancialDataGenerator

# Initialize generator
generator = FinancialDataGenerator(
    start_date='2021-01-01',
    end_date='2024-12-31'
)

# Generate data
data_dir = os.path.join(BASE_DIR, 'data')
customers_df, transactions_df = generator.generate_all(data_dir, num_customers=2500)

print(f"\nCustomers: {len(customers_df)}")
print(f"Transactions: {len(transactions_df)}")

In [None]:
# Explore customer data
print("=== Customer Data Sample ===")
display(customers_df.head(10))

print("\n=== Customer Statistics ===")
print(customers_df.describe())

In [None]:
# Explore transaction data
print("=== Transaction Data Sample ===")
display(transactions_df.head(10))

print("\n=== Transaction Statistics ===")
print(transactions_df.describe())

In [None]:
# Visualize data distributions
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Customer signup trend
customers_df['signup_date'] = pd.to_datetime(customers_df['signup_date'])
monthly_signups = customers_df.set_index('signup_date').resample('M').size()
axes[0, 0].plot(monthly_signups.index, monthly_signups.values, 'b-', linewidth=2)
axes[0, 0].fill_between(monthly_signups.index, monthly_signups.values, alpha=0.3)
axes[0, 0].set_title('Customer Signups Over Time', fontsize=12, fontweight='bold')
axes[0, 0].set_xlabel('Date')
axes[0, 0].set_ylabel('New Customers')

# Subscription distribution
sub_counts = customers_df['subscription_type'].value_counts()
axes[0, 1].pie(sub_counts, labels=sub_counts.index, autopct='%1.1f%%', startangle=90)
axes[0, 1].set_title('Subscription Distribution', fontsize=12, fontweight='bold')

# Monthly fee distribution
axes[1, 0].hist(customers_df['monthly_fee'], bins=30, color='steelblue', edgecolor='white')
axes[1, 0].set_title('Monthly Fee Distribution', fontsize=12, fontweight='bold')
axes[1, 0].set_xlabel('Monthly Fee ($)')
axes[1, 0].set_ylabel('Count')

# Transaction status
status_counts = transactions_df['transaction_status'].value_counts()
colors = ['#2ecc71', '#e74c3c', '#f39c12', '#3498db']
axes[1, 1].bar(status_counts.index, status_counts.values, color=colors)
axes[1, 1].set_title('Transaction Status Distribution', fontsize=12, fontweight='bold')
axes[1, 1].set_xlabel('Status')
axes[1, 1].set_ylabel('Count')

plt.tight_layout()
plt.show()

## 2. Data Preprocessing

Clean data and engineer features for analytics

In [None]:
from data_preprocessing import DataPreprocessor

# Initialize preprocessor
preprocessor = DataPreprocessor(data_dir)

# Run preprocessing pipeline
output_dir = os.path.join(BASE_DIR, 'outputs')
processed_customers, processed_transactions = preprocessor.run_pipeline(output_dir)

print(f"\nProcessed customers shape: {processed_customers.shape}")
print(f"Processed transactions shape: {processed_transactions.shape}")

In [None]:
# View engineered features
print("=== Engineered Customer Features ===")
print(list(processed_customers.columns))

print("\n=== Sample Processed Data ===")
display(processed_customers[[
    'customer_id', 'subscription_type', 'total_revenue', 
    'customer_tenure_days', 'transaction_frequency', 
    'payment_failure_rate', 'is_churned'
]].head(10))

## 3. Revenue Forecasting

Forecast revenue using Prophet and ARIMA models

In [None]:
from revenue_forecasting import RevenueForecaster

# Initialize forecaster
forecaster = RevenueForecaster(
    data_dir=output_dir,
    output_dir=os.path.join(output_dir, 'forecasts'),
    visuals_dir=os.path.join(BASE_DIR, 'visuals')
)

# Run forecasting
forecast_results = forecaster.run_forecasting(forecast_periods=12)

In [None]:
# View forecast results
print("=== Revenue Forecast (Next 12 Months) ===")
display(forecast_results['combined_forecast'])

In [None]:
# Visualize forecasts
combined = forecast_results['combined_forecast']
monthly_rev = forecaster.monthly_revenue

fig, ax = plt.subplots(figsize=(14, 6))

# Historical
ax.plot(monthly_rev['date'], monthly_rev['revenue'], 'b-', linewidth=2, label='Historical')

# Forecasts
ax.plot(combined['date'], combined['forecast_prophet'], 'r--', linewidth=1.5, label='Prophet')
ax.plot(combined['date'], combined['forecast_arima'], 'g--', linewidth=1.5, label='ARIMA')
ax.plot(combined['date'], combined['forecast_ensemble'], 'purple', linewidth=2.5, label='Ensemble')

ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Revenue ($)', fontsize=12)
ax.set_title('Revenue Forecast Comparison', fontsize=14, fontweight='bold')
ax.legend(loc='upper left')
ax.grid(True, alpha=0.3)
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'${x/1000:.0f}K'))

plt.tight_layout()
plt.show()

## 4. Customer Churn Prediction

Predict customer churn using Logistic Regression and Random Forest

In [None]:
from churn_prediction import ChurnPredictor

# Initialize predictor
predictor = ChurnPredictor(
    data_dir=output_dir,
    output_dir=os.path.join(output_dir, 'churn'),
    models_dir=os.path.join(BASE_DIR, 'models'),
    visuals_dir=os.path.join(BASE_DIR, 'visuals')
)

# Run churn prediction
churn_results = predictor.run_churn_prediction()

In [None]:
# View churn predictions
print("=== Churn Risk Summary ===")
display(churn_results['risk_summary'])

print("\n=== Model Comparison ===")
display(churn_results['comparison'])

In [None]:
# Visualize churn predictions
predictions = churn_results['predictions']

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Churn probability distribution
axes[0].hist(predictions['churn_probability'], bins=30, color='steelblue', edgecolor='white')
axes[0].axvline(x=0.5, color='red', linestyle='--', label='Decision Threshold')
axes[0].set_xlabel('Churn Probability', fontsize=12)
axes[0].set_ylabel('Count', fontsize=12)
axes[0].set_title('Distribution of Churn Probability', fontsize=14, fontweight='bold')
axes[0].legend()

# Risk level distribution
risk_counts = predictions['churn_risk_level'].value_counts()
colors = ['#2ecc71', '#f39c12', '#e74c3c']
axes[1].pie(risk_counts, labels=risk_counts.index, autopct='%1.1f%%', colors=colors, startangle=90)
axes[1].set_title('Customers by Churn Risk Level', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

## 5. Profitability Analysis & Customer Segmentation

Calculate customer profitability and perform segmentation

In [None]:
from profitability_analysis import ProfitabilityAnalyzer

# Initialize analyzer
analyzer = ProfitabilityAnalyzer(
    data_dir=output_dir,
    output_dir=os.path.join(output_dir, 'profitability'),
    visuals_dir=os.path.join(BASE_DIR, 'visuals')
)

# Run profitability analysis
profit_results = analyzer.run_profitability_analysis()

In [None]:
# View profitability results
print("=== Profitability by Tier ===")
display(profit_results['tier_summary'])

print("\n=== Customer Segment Profiles ===")
display(profit_results['segment_profile'])

## 6. Cohort & RFM Analysis

Perform cohort retention analysis and RFM customer scoring

In [None]:
from cohort_rfm_analysis import CohortRFMAnalyzer

# Initialize analyzer
cohort_analyzer = CohortRFMAnalyzer(
    data_dir=output_dir,
    output_dir=os.path.join(output_dir, 'cohort_rfm'),
    visuals_dir=os.path.join(BASE_DIR, 'visuals')
)

# Run cohort and RFM analysis
cohort_results = cohort_analyzer.run_cohort_rfm_analysis()

In [None]:
# View RFM results
print("=== RFM Segment Summary ===")
display(cohort_results['rfm_summary'])

print("\n=== Average Retention by Month ===")
display(cohort_results['avg_retention'])

In [None]:
# Visualize retention curve
avg_retention = cohort_results['avg_retention']

fig, ax = plt.subplots(figsize=(12, 6))

ax.plot(avg_retention['month'], avg_retention['avg_retention'], 'b-o', linewidth=2, markersize=8)
ax.fill_between(avg_retention['month'], avg_retention['avg_retention'], alpha=0.3)

ax.set_xlabel('Months Since Signup', fontsize=12)
ax.set_ylabel('Average Retention Rate (%)', fontsize=12)
ax.set_title('Customer Retention Curve', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)
ax.set_ylim(0, 105)

plt.tight_layout()
plt.show()

## 7. Executive Reporting

Generate executive reports and Power BI-ready data

In [None]:
from executive_reporting import ExecutiveReporter

# Initialize reporter
reporter = ExecutiveReporter(
    base_dir=BASE_DIR,
    output_dir=os.path.join(output_dir, 'reports'),
    visuals_dir=os.path.join(BASE_DIR, 'visuals'),
    dashboard_dir=os.path.join(BASE_DIR, 'dashboard')
)

# Run executive reporting
report_results = reporter.run_executive_reporting()

In [None]:
# View KPIs
print("=== Key Performance Indicators ===")
for metric, value in report_results['kpis'].items():
    if isinstance(value, float):
        if 'rate' in metric or 'pct' in metric or 'margin' in metric:
            print(f"{metric}: {value:.1f}%")
        elif value > 1000:
            print(f"{metric}: ${value:,.2f}")
        else:
            print(f"{metric}: {value:.2f}")
    else:
        print(f"{metric}: {value:,}")

## 8. Summary

### Key Findings:
1. **Revenue Forecasting**: Both Prophet and ARIMA models provide 12-month forecasts
2. **Churn Prediction**: ML models identify high-risk customers for retention efforts
3. **Profitability**: Customer segmentation reveals different profitability tiers
4. **RFM Analysis**: Customer behavior patterns identified through RFM scoring

### Output Files Generated:
- **data/**: Raw datasets (customers.csv, transactions.csv)
- **outputs/**: All analysis outputs including forecasts, churn predictions, profitability
- **models/**: Trained ML models (logistic_regression.pkl, random_forest.pkl)
- **visuals/**: All visualizations as PNG files
- **dashboard/**: Power BI-ready data exports

In [None]:
# List all generated files
print("=== Generated Output Files ===")
for root, dirs, files in os.walk(os.path.join(BASE_DIR, 'outputs')):
    level = root.replace(BASE_DIR, '').count(os.sep)
    indent = ' ' * 2 * level
    print(f"{indent}{os.path.basename(root)}/")
    subindent = ' ' * 2 * (level + 1)
    for file in files:
        print(f"{subindent}{file}")