In [None]:
# Real Liquidity Modeling with Actual CBK Data
# 🏛️ NERVA DIVINE LIQUIDITY INTELLIGENCE - INTERBANK ANALYSIS
# Using actual Central Bank of Kenya interbank and repo market data

import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

print("🏛️ NERVA DIVINE LIQUIDITY MODELING SYSTEM")
print("="*60)
print("💰 Initializing Real CBK Liquidity Analysis...")
print("🔄 Loading actual interbank market data...")

class RealLiquidityModeler:
    """Advanced liquidity modeling using real CBK interbank data"""
    
    def __init__(self, data_path="../data/raw/"):
        self.data_path = data_path
        self.interbank_data = None
        self.repo_data = None
        self.discount_window_data = None
        self.horizontal_repo_data = None
        self.treasury_bills_data = None
        self.treasury_bonds_data = None
        
    def load_real_liquidity_data(self):
        """Load all liquidity-related datasets"""
        try:
            print("\\n📥 Loading CBK liquidity datasets...")
            
            # Interbank rates and volumes
            try:
                self.interbank_data = pd.read_csv(f"{self.data_path}Interbank Rates  Volumes.csv")
                print(f"   ✅ Interbank Data: {len(self.interbank_data)} records")
            except Exception as e:
                print(f"   ⚠️ Primary interbank data not found, trying alternative...")
                try:
                    self.interbank_data = pd.read_csv(f"{self.data_path}Interbank Rates () .csv")
                    print(f"   ✅ Alternative Interbank Data: {len(self.interbank_data)} records")
                except Exception as e2:
                    print(f"   ❌ Interbank Data error: {str(e2)}")
            
            # Repo market operations
            try:
                self.repo_data = pd.read_csv(f"{self.data_path}Repo and Reverse Repo .csv")
                print(f"   ✅ Repo Data: {len(self.repo_data)} records")
            except Exception as e:
                print(f"   ❌ Repo Data error: {str(e)}")
            
            # Horizontal repo market
            try:
                self.horizontal_repo_data = pd.read_csv(f"{self.data_path}Horizontal Repo Market.csv")
                print(f"   ✅ Horizontal Repo Data: {len(self.horizontal_repo_data)} records")
            except Exception as e:
                print(f"   ❌ Horizontal Repo Data error: {str(e)}")
            
            # Discount window operations
            try:
                self.discount_window_data = pd.read_csv(f"{self.data_path}Discount Window.csv")
                print(f"   ✅ Discount Window Data: {len(self.discount_window_data)} records")
            except Exception as e:
                print(f"   ❌ Discount Window Data error: {str(e)}")
            
            # Treasury bills (liquidity management tool)
            try:
                self.treasury_bills_data = pd.read_csv(f"{self.data_path}Issues of Treasury Bills.csv")
                print(f"   ✅ Treasury Bills Data: {len(self.treasury_bills_data)} records")
            except Exception as e:
                print(f"   ❌ Treasury Bills Data error: {str(e)}")
            
            # Treasury bonds
            try:
                self.treasury_bonds_data = pd.read_csv(f"{self.data_path}Issues of Treasury Bonds.csv")
                print(f"   ✅ Treasury Bonds Data: {len(self.treasury_bonds_data)} records")
            except Exception as e:
                print(f"   ❌ Treasury Bonds Data error: {str(e)}")
            
            return True
            
        except Exception as e:
            print(f"❌ Error loading liquidity data: {str(e)}")
            return False
    
    def preprocess_interbank_data(self):
        """Process interbank market data for liquidity analysis"""
        if self.interbank_data is None:
            return None
            
        try:
            df = self.interbank_data.copy()
            
            # Display first few rows to understand structure
            print("\\n📊 Interbank data structure:")
            print(df.head())
            print(f"Columns: {list(df.columns)}")
            
            # Find date and rate columns
            date_cols = [col for col in df.columns if any(word in col.lower() for word in ['date', 'period', 'time'])]
            rate_cols = [col for col in df.columns if any(word in col.lower() for word in ['rate', 'interbank', 'overnight'])]
            volume_cols = [col for col in df.columns if any(word in col.lower() for word in ['volume', 'amount', 'value'])]
            
            if date_cols:
                # Process with identified columns
                df_clean = df.copy()
                
                # Clean date column
                date_col = date_cols[0]
                df_clean['date'] = pd.to_datetime(df_clean[date_col], errors='coerce')
                df_clean = df_clean.dropna(subset=['date'])
                
                # Process rate columns
                if rate_cols:
                    for rate_col in rate_cols[:3]:  # Take first 3 rate columns
                        df_clean[f'rate_{rate_col}'] = pd.to_numeric(df_clean[rate_col], errors='coerce')
                
                # Process volume columns
                if volume_cols:
                    for vol_col in volume_cols[:3]:  # Take first 3 volume columns
                        df_clean[f'volume_{vol_col}'] = pd.to_numeric(df_clean[vol_col], errors='coerce')
                
                # Sort by date
                df_clean = df_clean.sort_values('date').reset_index(drop=True)
                
                # Calculate liquidity metrics
                if rate_cols:
                    main_rate_col = f'rate_{rate_cols[0]}'
                    if main_rate_col in df_clean.columns:
                        df_clean['rate_volatility'] = df_clean[main_rate_col].rolling(window=5).std()
                        df_clean['rate_trend'] = df_clean[main_rate_col].rolling(window=5).mean()
                
                print(f"✅ Interbank data processed: {len(df_clean)} records")
                return df_clean
            else:
                print("❌ Could not identify date columns in interbank data")
                return None
                
        except Exception as e:
            print(f"❌ Error processing interbank data: {str(e)}")
            return None
    
    def analyze_liquidity_conditions(self, interbank_df):
        """Analyze current liquidity conditions"""
        if interbank_df is None:
            return None
            
        try:
            # Find rate columns
            rate_cols = [col for col in interbank_df.columns if col.startswith('rate_')]
            
            if not rate_cols:
                print("❌ No rate columns found for liquidity analysis")
                return None
            
            main_rate_col = rate_cols[0]
            
            analysis = {
                'current_rate': interbank_df[main_rate_col].iloc[-1] if not pd.isna(interbank_df[main_rate_col].iloc[-1]) else 0,
                'avg_rate': interbank_df[main_rate_col].mean(),
                'rate_volatility': interbank_df[main_rate_col].std(),
                'liquidity_stress': 'High' if interbank_df[main_rate_col].iloc[-1] > interbank_df[main_rate_col].quantile(0.75) else 'Normal',
                'recent_trend': 'Tightening' if interbank_df[main_rate_col].iloc[-5:].mean() > interbank_df[main_rate_col].iloc[-10:-5].mean() else 'Easing',
                'data_points': len(interbank_df),
                'rate_range': f"{interbank_df[main_rate_col].min():.2f}% - {interbank_df[main_rate_col].max():.2f}%"
            }
            
            return analysis
            
        except Exception as e:
            print(f"❌ Error analyzing liquidity conditions: {str(e)}")
            return None
    
    def create_liquidity_dashboard(self, interbank_df, repo_df=None):
        """Create comprehensive liquidity analysis dashboard"""
        if interbank_df is None:
            return None
            
        try:
            # Find rate columns
            rate_cols = [col for col in interbank_df.columns if col.startswith('rate_')]
            volume_cols = [col for col in interbank_df.columns if col.startswith('volume_')]
            
            if not rate_cols:
                print("❌ No rate data available for dashboard")
                return None
                
            main_rate_col = rate_cols[0]
            
            # Create subplot layout
            rows = 2 if repo_df is None else 3
            fig = make_subplots(
                rows=rows, cols=2,
                subplot_titles=(
                    'Interbank Rate Evolution',
                    'Rate Volatility Analysis',
                    'Trading Volume (if available)',
                    'Liquidity Stress Indicators'
                ) + (('Repo Market Activity', 'Policy Operations') if repo_df is not None else ()),
                specs=[[{"secondary_y": False}, {"secondary_y": False}]] * rows
            )
            
            # Interbank rates
            fig.add_trace(
                go.Scatter(
                    x=interbank_df['date'],
                    y=interbank_df[main_rate_col],
                    mode='lines+markers',
                    name='Interbank Rate',
                    line=dict(color='darkblue', width=2),
                    marker=dict(size=4)
                ),
                row=1, col=1
            )
            
            # Rate volatility
            if 'rate_volatility' in interbank_df.columns:
                fig.add_trace(
                    go.Scatter(
                        x=interbank_df['date'],
                        y=interbank_df['rate_volatility'],
                        mode='lines',
                        name='Rate Volatility',
                        line=dict(color='red', width=2),
                        fill='tonexty'
                    ),
                    row=1, col=2
                )
            
            # Volume analysis (if available)
            if volume_cols:
                main_vol_col = volume_cols[0]
                fig.add_trace(
                    go.Bar(
                        x=interbank_df['date'],
                        y=interbank_df[main_vol_col],
                        name='Trading Volume',
                        marker_color='lightblue'
                    ),
                    row=2, col=1
                )
            
            # Liquidity stress heatmap
            rate_values = interbank_df[main_rate_col].dropna()
            stress_levels = pd.cut(rate_values, bins=5, labels=['Low', 'Moderate', 'Normal', 'Elevated', 'High'])
            stress_counts = stress_levels.value_counts()
            
            fig.add_trace(
                go.Bar(
                    x=stress_counts.index,
                    y=stress_counts.values,
                    name='Liquidity Stress Distribution',
                    marker_color=['green', 'lightgreen', 'yellow', 'orange', 'red'][:len(stress_counts)]
                ),
                row=2, col=2
            )
            
            fig.update_layout(
                title="💰 REAL CBK LIQUIDITY CONDITIONS ANALYSIS",
                height=600 if repo_df is None else 900,
                showlegend=True,
                title_font=dict(size=18, color='darkblue')
            )
            
            return fig
            
        except Exception as e:
            print(f"❌ Error creating liquidity dashboard: {str(e)}")
            return None

# Initialize the liquidity modeler
liquidity_modeler = RealLiquidityModeler()

print("\\n🚀 INITIALIZING REAL LIQUIDITY ANALYSIS...")
print("="*50)

In [None]:
# Load and Analyze Real Liquidity Data
print("💰 LOADING REAL CBK LIQUIDITY DATA")
print("="*50)

# Load all liquidity-related datasets
data_loaded = liquidity_modeler.load_real_liquidity_data()

if data_loaded:
    # Process interbank data (main liquidity indicator)
    print("\\n🔧 Processing interbank market data...")
    interbank_processed = liquidity_modeler.preprocess_interbank_data()
    
    if interbank_processed is not None:
        # Analyze liquidity conditions
        print("\\n🔍 Analyzing liquidity conditions...")
        liquidity_analysis = liquidity_modeler.analyze_liquidity_conditions(interbank_processed)
        
        if liquidity_analysis:
            # Display key insights
            print(f"\\n🎯 LIQUIDITY MARKET INSIGHTS:")
            print(f"   💰 Current Interbank Rate: {liquidity_analysis['current_rate']:.2f}%")
            print(f"   📊 Average Rate: {liquidity_analysis['avg_rate']:.2f}%")
            print(f"   📈 Market Trend: {liquidity_analysis['recent_trend']}")
            print(f"   ⚠️ Liquidity Stress: {liquidity_analysis['liquidity_stress']}")
            print(f"   🎯 Rate Volatility: {liquidity_analysis['rate_volatility']:.2f}%")
            print(f"   📊 Data Points: {liquidity_analysis['data_points']}")
            print(f"   📈 Rate Range: {liquidity_analysis['rate_range']}")
            
            # Create comprehensive dashboard
            print("\\n📊 Creating liquidity analysis dashboard...")
            liquidity_dashboard = liquidity_modeler.create_liquidity_dashboard(
                interbank_processed, 
                liquidity_modeler.repo_data
            )
            
            if liquidity_dashboard:
                liquidity_dashboard.show()
                print("✅ Liquidity dashboard created successfully!")
            else:
                print("❌ Failed to create liquidity dashboard")
        else:
            print("❌ Failed to analyze liquidity conditions")
    else:
        print("❌ Failed to process interbank data")
else:
    print("❌ Failed to load liquidity datasets")

In [None]:
# Advanced Liquidity Forecasting & Repo Market Analysis
print("\\n🧠 ADVANCED LIQUIDITY FORECASTING")
print("="*50)

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

class LiquidityForecaster:
    """Advanced liquidity forecasting using CBK market data"""
    
    def __init__(self):
        self.models = {
            'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
            'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42),
            'Lasso Regression': Lasso(alpha=0.1, random_state=42),
            'Linear Model': LinearRegression()
        }
        self.scaler = StandardScaler()
        self.best_model = None
        self.model_performance = {}
    
    def create_liquidity_features(self, interbank_df):
        """Create comprehensive features for liquidity forecasting"""
        features_df = interbank_df.copy()
        
        # Find main rate column
        rate_cols = [col for col in features_df.columns if col.startswith('rate_')]
        if not rate_cols:
            print("❌ No rate columns found for feature creation")
            return None
            
        main_rate_col = rate_cols[0]
        
        # Lagged rate features
        for lag in [1, 2, 3, 5, 10]:
            features_df[f'rate_lag_{lag}'] = features_df[main_rate_col].shift(lag)
        
        # Rolling statistics
        for window in [3, 5, 10, 20]:
            features_df[f'rate_ma_{window}'] = features_df[main_rate_col].rolling(window=window).mean()
            features_df[f'rate_std_{window}'] = features_df[main_rate_col].rolling(window=window).std()
        
        # Rate change features
        features_df['rate_change_1'] = features_df[main_rate_col].diff(1)
        features_df['rate_change_5'] = features_df[main_rate_col].diff(5)
        
        # Momentum indicators
        features_df['rate_momentum_3'] = features_df[main_rate_col].rolling(3).apply(lambda x: x.iloc[-1] - x.iloc[0])
        features_df['rate_momentum_5'] = features_df[main_rate_col].rolling(5).apply(lambda x: x.iloc[-1] - x.iloc[0])
        
        # Volatility indicators
        features_df['rate_volatility_5'] = features_df[main_rate_col].rolling(5).std()
        features_df['rate_volatility_10'] = features_df[main_rate_col].rolling(10).std()
        
        # Market stress indicators
        features_df['rate_above_median'] = (features_df[main_rate_col] > features_df[main_rate_col].rolling(20).median()).astype(int)
        features_df['high_volatility'] = (features_df['rate_volatility_5'] > features_df['rate_volatility_5'].rolling(20).mean()).astype(int)
        
        # Time-based features
        features_df['month'] = features_df['date'].dt.month
        features_df['quarter'] = features_df['date'].dt.quarter
        features_df['day_of_week'] = features_df['date'].dt.dayofweek
        
        return features_df.dropna()
    
    def prepare_liquidity_forecasting_data(self, features_df, forecast_horizon=5):
        """Prepare data for liquidity forecasting"""
        # Find main rate column
        rate_cols = [col for col in features_df.columns if col.startswith('rate_') and not any(x in col for x in ['lag', 'ma', 'std', 'change', 'momentum', 'volatility'])]
        
        if not rate_cols:
            print("❌ No main rate column found")
            return None, None, None
            
        main_rate_col = rate_cols[0]
        
        # Create future target
        features_df[f'future_rate_{forecast_horizon}'] = features_df[main_rate_col].shift(-forecast_horizon)
        
        # Remove rows with NaN targets
        df_clean = features_df.dropna()
        
        # Select feature columns
        feature_cols = [col for col in df_clean.columns if col not in ['date', main_rate_col, f'future_rate_{forecast_horizon}']]
        
        X = df_clean[feature_cols]
        y = df_clean[f'future_rate_{forecast_horizon}']
        
        return X, y, feature_cols
    
    def train_liquidity_models(self, X, y):
        """Train liquidity forecasting models"""
        # Split data for validation
        split_idx = int(len(X) * 0.8)
        X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
        y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]
        
        results = {}
        
        for name, model in self.models.items():
            print(f"\\n🤖 Training {name} for liquidity forecasting...")
            
            # Scale features
            X_train_scaled = self.scaler.fit_transform(X_train)
            X_test_scaled = self.scaler.transform(X_test)
            
            # Train model
            model.fit(X_train_scaled, y_train)
            
            # Predict
            y_pred = model.predict(X_test_scaled)
            
            # Calculate metrics
            mae = mean_absolute_error(y_test, y_pred)
            mse = mean_squared_error(y_test, y_pred)
            rmse = np.sqrt(mse)
            r2 = r2_score(y_test, y_pred)
            
            results[name] = {
                'model': model,
                'mae': mae,
                'mse': mse,
                'rmse': rmse,
                'r2': r2,
                'predictions': y_pred,
                'actual': y_test
            }
            
            print(f"   📊 MAE: {mae:.4f}")
            print(f"   📊 RMSE: {rmse:.4f}")
            print(f"   📊 R²: {r2:.4f}")
        
        # Find best model
        best_model_name = max(results.keys(), key=lambda x: results[x]['r2'])
        self.best_model = results[best_model_name]['model']
        self.model_performance = results
        
        print(f"\\n🏆 Best Model: {best_model_name} (R²: {results[best_model_name]['r2']:.4f})")
        
        return results
    
    def create_forecasting_visualization(self, model_results):
        """Create liquidity forecasting visualization"""
        try:
            fig = make_subplots(
                rows=2, cols=2,
                subplot_titles=(
                    'Model Performance Comparison',
                    'Best Model: Predicted vs Actual',
                    'Forecasting Accuracy',
                    'Model Residuals'
                )
            )
            
            # Model performance comparison
            models = list(model_results.keys())
            r2_scores = [model_results[model]['r2'] for model in models]
            
            fig.add_trace(
                go.Bar(name='R² Score', x=models, y=r2_scores, marker_color='lightblue'),
                row=1, col=1
            )
            
            # Best model predictions
            best_model_name = max(models, key=lambda x: model_results[x]['r2'])
            best_results = model_results[best_model_name]
            
            fig.add_trace(
                go.Scatter(name='Actual', y=best_results['actual'].values, mode='lines', line=dict(color='blue')),
                row=1, col=2
            )
            fig.add_trace(
                go.Scatter(name='Predicted', y=best_results['predictions'], mode='lines', line=dict(color='red')),
                row=1, col=2
            )
            
            # RMSE comparison
            rmse_scores = [model_results[model]['rmse'] for model in models]
            fig.add_trace(
                go.Bar(name='RMSE', x=models, y=rmse_scores, marker_color='lightcoral'),
                row=2, col=1
            )
            
            # Residuals
            residuals = best_results['actual'].values - best_results['predictions']
            fig.add_trace(
                go.Scatter(x=best_results['predictions'], y=residuals, mode='markers', 
                          name='Residuals', marker=dict(color='green')),
                row=2, col=2
            )
            
            fig.update_layout(
                title="💰 LIQUIDITY FORECASTING MODEL PERFORMANCE",
                height=800,
                showlegend=True
            )
            
            return fig
            
        except Exception as e:
            print(f"❌ Error creating forecasting visualization: {str(e)}")
            return None

# Initialize forecaster and run analysis
forecaster = LiquidityForecaster()

if 'interbank_processed' in locals() and interbank_processed is not None:
    print("\\n🔧 Creating liquidity forecasting features...")
    liquidity_features = forecaster.create_liquidity_features(interbank_processed)
    
    if liquidity_features is not None:
        print(f"   📊 Features created: {len(liquidity_features.columns)}")
        print(f"   📊 Sample size: {len(liquidity_features)}")
        
        # Prepare forecasting data
        print("\\n🎯 Preparing liquidity forecasting data...")
        X, y, feature_cols = forecaster.prepare_liquidity_forecasting_data(liquidity_features)
        
        if X is not None and len(X) > 10:  # Need minimum data for training
            print(f"   📊 Training samples: {len(X)}")
            print(f"   📊 Features: {len(feature_cols)}")
            
            # Train forecasting models
            print("\\n🚀 Training liquidity forecasting models...")
            forecast_results = forecaster.train_liquidity_models(X, y)
            
            # Create visualization
            print("\\n📊 Creating forecasting visualization...")
            forecast_viz = forecaster.create_forecasting_visualization(forecast_results)
            
            if forecast_viz:
                forecast_viz.show()
                print("✅ Liquidity forecasting analysis complete!")
            
            print(f"\\n🎯 LIQUIDITY FORECASTING INSIGHTS:")
            best_model = max(forecast_results.keys(), key=lambda x: forecast_results[x]['r2'])
            print(f"   🏆 Best Model: {best_model}")
            print(f"   📊 Accuracy (R²): {forecast_results[best_model]['r2']:.4f}")
            print(f"   🎯 Prediction Error: ±{forecast_results[best_model]['rmse']:.4f}%")
            
        else:
            print("❌ Insufficient data for liquidity forecasting")
    else:
        print("❌ Failed to create liquidity features")
else:
    print("❌ No interbank data available for liquidity forecasting")