# Advanced Data Analytics Pipeline - Comprehensive Demo

**Author**: Osman Abdullahi  
**Email**: Osmandabdullahi@gmail.com  
**Project**: Professional Data Analytics Portfolio

This notebook demonstrates advanced data analytics capabilities including:
- Statistical Analysis
- Business Intelligence & KPIs
- Customer Segmentation
- Financial Analytics
- Predictive Modeling
- Interactive Visualizations

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from datetime import datetime, timedelta

# Set style for professional visualizations
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
warnings.filterwarnings('ignore')

# Configure display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

print("📊 Advanced Data Analytics Pipeline - Demo Notebook")
print("🚀 Ready for professional data analysis!")

## 1. Data Loading & Initial Exploration

Loading our professionally generated datasets for comprehensive analysis.

In [None]:
# Load datasets
sales_data = pd.read_csv('../data/sales_data.csv')
financial_data = pd.read_csv('../data/financial_data.csv')
ecommerce_data = pd.read_csv('../data/ecommerce_data.csv')

# Convert date columns
sales_data['date'] = pd.to_datetime(sales_data['date'])
financial_data['date'] = pd.to_datetime(financial_data['date'])
ecommerce_data['date'] = pd.to_datetime(ecommerce_data['date'])

print("📈 Dataset Overview:")
print(f"Sales Data: {sales_data.shape[0]:,} transactions, {sales_data.shape[1]} features")
print(f"Financial Data: {financial_data.shape[0]:,} records, {financial_data.shape[1]} features")
print(f"E-commerce Data: {ecommerce_data.shape[0]:,} sessions, {ecommerce_data.shape[1]} features")

# Display sample data
print("\n🛍️ Sales Data Sample:")
display(sales_data.head())

print("\n💰 Financial Data Sample:")
display(financial_data.head())

## 2. Statistical Analysis

Comprehensive statistical analysis demonstrating professional data science capabilities.

In [None]:
# Import our custom analytics module
import sys
sys.path.append('../')
from pipe.analytics.statistical_analysis import StatisticalAnalyzer

# Initialize statistical analyzer
stats_analyzer = StatisticalAnalyzer(sales_data)

# Generate descriptive statistics
desc_stats = stats_analyzer.descriptive_statistics(['revenue', 'quantity', 'unit_price', 'customer_age'])

print("📊 Descriptive Statistics Summary:")
stats_df = pd.DataFrame(desc_stats).T
display(stats_df[['mean', 'median', 'std', 'skewness', 'kurtosis']].round(4))

# Correlation analysis
correlation_matrix = stats_analyzer.correlation_analysis(method='pearson')

# Visualize correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, 
            square=True, linewidths=0.5)
plt.title('📈 Correlation Matrix - Sales Data', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

# Hypothesis testing - Revenue by customer segment
hypothesis_result = stats_analyzer.hypothesis_testing(
    group_col='customer_segment', 
    value_col='revenue', 
    test_type='anova'
)

print(f"\n🧪 Hypothesis Testing Results:")
print(f"Test: {hypothesis_result['test_name']}")
print(f"Statistic: {hypothesis_result['statistic']:.4f}")
print(f"P-value: {hypothesis_result['p_value']:.6f}")
print(f"Significant difference: {hypothesis_result['significant']}")

## 3. Business Intelligence & KPI Analysis

Executive-level business intelligence with actionable insights.

In [None]:
from pipe.analytics.business_intelligence import BusinessIntelligence

# Initialize BI analyzer
bi_analyzer = BusinessIntelligence(sales_data, date_col='date')

# Revenue analytics
revenue_analytics = bi_analyzer.revenue_analytics(revenue_col='revenue', period='monthly')

print("💰 Revenue Analytics Summary:")
print(f"Total Revenue: ${revenue_analytics['total_revenue']:,.2f}")
print(f"Average Transaction Value: ${revenue_analytics['avg_transaction_value']:.2f}")
print(f"Total Transactions: {revenue_analytics['total_transactions']:,}")
print(f"Average Growth Rate: {revenue_analytics['growth_rate_avg']*100:.2f}%")

# Visualize revenue trends
revenue_trends = revenue_analytics['period_analysis']

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Monthly revenue
axes[0,0].plot(revenue_trends['period'].astype(str), revenue_trends['total_revenue'], 
               marker='o', linewidth=2, markersize=6)
axes[0,0].set_title('📈 Monthly Revenue Trend', fontweight='bold')
axes[0,0].set_ylabel('Revenue ($)')
axes[0,0].tick_params(axis='x', rotation=45)

# Transaction count
axes[0,1].bar(revenue_trends['period'].astype(str), revenue_trends['transactions'], 
              alpha=0.7, color='skyblue')
axes[0,1].set_title('📊 Monthly Transaction Count', fontweight='bold')
axes[0,1].set_ylabel('Transactions')
axes[0,1].tick_params(axis='x', rotation=45)

# Revenue growth rate
axes[1,0].plot(revenue_trends['period'].astype(str)[1:], 
               revenue_trends['revenue_growth'][1:]*100, 
               marker='s', linewidth=2, color='green')
axes[1,0].set_title('📈 Revenue Growth Rate (%)', fontweight='bold')
axes[1,0].set_ylabel('Growth Rate (%)')
axes[1,0].tick_params(axis='x', rotation=45)
axes[1,0].axhline(y=0, color='red', linestyle='--', alpha=0.5)

# Average transaction value
axes[1,1].plot(revenue_trends['period'].astype(str), revenue_trends['avg_revenue'], 
               marker='^', linewidth=2, color='orange')
axes[1,1].set_title('💵 Average Transaction Value', fontweight='bold')
axes[1,1].set_ylabel('Avg Revenue ($)')
axes[1,1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Customer analytics
customer_analytics = bi_analyzer.customer_analytics(customer_col='customer_id', revenue_col='revenue')

print(f"\n👥 Customer Analytics:")
print(f"Total Customers: {customer_analytics['total_customers']:,}")
print(f"Average Customer LTV: ${customer_analytics['avg_customer_lifetime_value']:.2f}")
print(f"Customer Retention Rate: {customer_analytics['customer_retention_rate']*100:.2f}%")
print(f"Avg Transactions per Customer: {customer_analytics['avg_transactions_per_customer']:.2f}")

## 4. Customer Segmentation Analysis

Advanced customer segmentation using RFM analysis and machine learning.

In [None]:
from pipe.ml.customer_segmentation import CustomerSegmentation

# Initialize customer segmentation
segmentation = CustomerSegmentation(sales_data)

# Calculate RFM metrics
rfm_data = segmentation.calculate_rfm(
    customer_col='customer_id',
    date_col='date', 
    revenue_col='revenue'
)

print("🎯 RFM Analysis Results:")
display(rfm_data.head(10))

# Create customer segments using rule-based approach
segments = segmentation.create_customer_segments(method='rfm_rules')

# Analyze segments
segment_analysis = segmentation.analyze_segments()

print("\n📊 Customer Segment Analysis:")
print(f"Total Customers Analyzed: {segment_analysis['total_customers']:,}")
print(f"Total Revenue: ${segment_analysis['total_revenue']:,.2f}")

# Display segment distribution
print("\n🏷️ Segment Distribution:")
segment_dist = pd.DataFrame({
    'Count': segment_analysis['segment_sizes'],
    'Percentage': segment_analysis['segment_percentages'],
    'Revenue': segment_analysis['revenue_by_segment'],
    'Revenue %': segment_analysis['revenue_percentage']
})
display(segment_dist)

# Generate business recommendations
recommendations = segmentation.generate_recommendations()

print("\n💡 Strategic Recommendations:")
for segment, rec in recommendations.items():
    print(f"\n🎯 {segment}:")
    print(f"   Strategy: {rec['strategy']}")
    print(f"   Priority: {rec['priority']}")
    print(f"   Actions: {', '.join(rec['actions'][:2])}...")