In [1]:
# Real Inflation Modeling with Actual CBK Data
# 🏛️ NERVA DIVINE ECONOMIC INTELLIGENCE - INFLATION ANALYSIS
# Using actual Central Bank of Kenya datasets for accurate modeling

import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

print("🏛️ NERVA DIVINE INFLATION MODELING SYSTEM")
print("="*60)
print("📊 Initializing Real CBK Inflation Analysis...")
print("🎯 Loading actual monetary policy data...")

class RealInflationModeler:
    """Advanced inflation modeling using real CBK data"""
    
    def __init__(self, data_path="../data/raw/"):
        self.data_path = data_path
        self.cbr_data = None
        self.gdp_data = None
        self.fx_data = None
        self.repo_data = None
        self.money_supply_data = None
        
    def load_real_inflation_data(self):
        """Load all relevant inflation-related datasets"""
        try:
            print("\\n📥 Loading CBK inflation datasets...")
            
            # Central Bank Rate (Primary inflation tool)
            try:
                self.cbr_data = pd.read_csv(f"{self.data_path}Central Bank Rate (CBR)  .csv")
                print(f"   ✅ CBR Data: {len(self.cbr_data)} records")
            except Exception as e:
                print(f"   ❌ CBR Data error: {str(e)}")
            
            # GDP data (inflation pressure indicator)
            try:
                self.gdp_data = pd.read_csv(f"{self.data_path}Annual GDP.csv")
                print(f"   ✅ GDP Data: {len(self.gdp_data)} records")
            except Exception as e:
                print(f"   ❌ GDP Data error: {str(e)}")
            
            # Exchange rate (imported inflation)
            try:
                self.fx_data = pd.read_csv(f"{self.data_path}Monthly exchange rate (end period).csv")
                print(f"   ✅ FX Data: {len(self.fx_data)} records")
            except Exception as e:
                print(f"   ❌ FX Data error: {str(e)}")
            
            # Repo operations (monetary policy transmission)
            try:
                self.repo_data = pd.read_csv(f"{self.data_path}Repo and Reverse Repo .csv")
                print(f"   ✅ Repo Data: {len(self.repo_data)} records")
            except Exception as e:
                print(f"   ❌ Repo Data error: {str(e)}")
            
            # Mobile payments (money velocity proxy)
            try:
                self.money_supply_data = pd.read_csv(f"{self.data_path}Mobile Payments.csv")
                print(f"   ✅ Money Supply Proxy: {len(self.money_supply_data)} records")
            except Exception as e:
                print(f"   ❌ Money Supply error: {str(e)}")
            
            return True
            
        except Exception as e:
            print(f"❌ Error loading inflation data: {str(e)}")
            return False
    
    def preprocess_cbr_data(self):
        """Process Central Bank Rate data for inflation analysis"""
        if self.cbr_data is None:
            return None
            
        try:
            # Clean and process CBR data
            df = self.cbr_data.copy()
            
            # Handle different possible column names
            date_cols = [col for col in df.columns if 'date' in col.lower() or 'period' in col.lower()]
            rate_cols = [col for col in df.columns if 'rate' in col.lower() or 'cbr' in col.lower()]
            
            if date_cols and rate_cols:
                df_clean = df[[date_cols[0], rate_cols[0]]].copy()
                df_clean.columns = ['date', 'cbr_rate']
                
                # Convert date
                df_clean['date'] = pd.to_datetime(df_clean['date'], errors='coerce')
                df_clean = df_clean.dropna()
                
                # Convert rate to numeric
                df_clean['cbr_rate'] = pd.to_numeric(df_clean['cbr_rate'], errors='coerce')
                df_clean = df_clean.dropna()
                
                # Sort by date
                df_clean = df_clean.sort_values('date').reset_index(drop=True)
                
                # Calculate rate changes (inflation expectations)
                df_clean['cbr_change'] = df_clean['cbr_rate'].diff()
                df_clean['cbr_trend'] = df_clean['cbr_rate'].rolling(window=3).mean()
                
                print(f"✅ CBR data processed: {len(df_clean)} records")
                return df_clean
            else:
                print("❌ Could not identify CBR columns")
                return None
                
        except Exception as e:
            print(f"❌ Error processing CBR data: {str(e)}")
            return None
    
    def analyze_inflation_drivers(self, cbr_df):
        """Analyze key inflation drivers from CBK data"""
        if cbr_df is None:
            return None
            
        try:
            analysis = {
                'current_cbr': cbr_df['cbr_rate'].iloc[-1],
                'cbr_trend': 'Tightening' if cbr_df['cbr_change'].iloc[-3:].mean() > 0 else 'Easing',
                'policy_stance': 'Hawkish' if cbr_df['cbr_rate'].iloc[-1] > cbr_df['cbr_rate'].mean() else 'Dovish',
                'volatility': cbr_df['cbr_rate'].std(),
                'recent_changes': cbr_df['cbr_change'].iloc[-6:].sum(),
                'avg_rate': cbr_df['cbr_rate'].mean(),
                'max_rate': cbr_df['cbr_rate'].max(),
                'min_rate': cbr_df['cbr_rate'].min()
            }
            
            return analysis
            
        except Exception as e:
            print(f"❌ Error analyzing inflation drivers: {str(e)}")
            return None
    
    def create_inflation_dashboard(self, cbr_df, analysis):
        """Create comprehensive inflation analysis dashboard"""
        if cbr_df is None or analysis is None:
            return None
            
        try:
            fig = make_subplots(
                rows=2, cols=2,
                subplot_titles=(
                    'Central Bank Rate Evolution',
                    'CBR Policy Changes',
                    'Rate Change Distribution',
                    'Policy Stance Analysis'
                ),
                specs=[[{"secondary_y": False}, {"secondary_y": False}],
                       [{"secondary_y": False}, {"secondary_y": False}]]
            )
            
            # CBR Evolution
            fig.add_trace(
                go.Scatter(
                    x=cbr_df['date'],
                    y=cbr_df['cbr_rate'],
                    mode='lines+markers',
                    name='CBR Rate',
                    line=dict(color='darkblue', width=3),
                    marker=dict(size=6)
                ),
                row=1, col=1
            )
            
            # Add trend line
            fig.add_trace(
                go.Scatter(
                    x=cbr_df['date'],
                    y=cbr_df['cbr_trend'],
                    mode='lines',
                    name='CBR Trend',
                    line=dict(color='red', width=2, dash='dash')
                ),
                row=1, col=1
            )
            
            # CBR Changes
            colors = ['green' if x >= 0 else 'red' for x in cbr_df['cbr_change'].fillna(0)]
            fig.add_trace(
                go.Bar(
                    x=cbr_df['date'],
                    y=cbr_df['cbr_change'].fillna(0),
                    name='Rate Changes',
                    marker_color=colors
                ),
                row=1, col=2
            )
            
            # Rate distribution
            fig.add_trace(
                go.Histogram(
                    x=cbr_df['cbr_rate'],
                    name='Rate Distribution',
                    marker_color='lightblue',
                    opacity=0.7
                ),
                row=2, col=1
            )
            
            # Policy stance pie chart
            stance_data = {
                'Hawkish Periods': len(cbr_df[cbr_df['cbr_rate'] > cbr_df['cbr_rate'].mean()]),
                'Dovish Periods': len(cbr_df[cbr_df['cbr_rate'] <= cbr_df['cbr_rate'].mean()])
            }
            
            fig.add_trace(
                go.Pie(
                    labels=list(stance_data.keys()),
                    values=list(stance_data.values()),
                    name="Policy Stance"
                ),
                row=2, col=2
            )
            
            fig.update_layout(
                title="🏛️ REAL CBK INFLATION POLICY ANALYSIS",
                height=800,
                showlegend=True,
                title_font=dict(size=18, color='darkblue')
            )
            
            return fig
            
        except Exception as e:
            print(f"❌ Error creating inflation dashboard: {str(e)}")
            return None

# Initialize the inflation modeler
inflation_modeler = RealInflationModeler()

print("\\n🚀 INITIALIZING REAL INFLATION ANALYSIS...")
print("="*50)

🏛️ NERVA DIVINE INFLATION MODELING SYSTEM
📊 Initializing Real CBK Inflation Analysis...
🎯 Loading actual monetary policy data...
\n🚀 INITIALIZING REAL INFLATION ANALYSIS...


In [2]:
# Load and Analyze Real Inflation Data
print("📊 LOADING REAL CBK INFLATION DATA")
print("="*50)

# Load all inflation-related datasets
data_loaded = inflation_modeler.load_real_inflation_data()

if data_loaded:
    # Process CBR data (main inflation tool)
    print("\\n🔧 Processing Central Bank Rate data...")
    cbr_processed = inflation_modeler.preprocess_cbr_data()
    
    if cbr_processed is not None:
        # Analyze inflation drivers
        print("\\n🔍 Analyzing inflation drivers...")
        inflation_analysis = inflation_modeler.analyze_inflation_drivers(cbr_processed)
        
        if inflation_analysis:
            # Display key insights
            print(f"\\n🎯 INFLATION POLICY INSIGHTS:")
            print(f"   🏛️ Current CBR: {inflation_analysis['current_cbr']:.2f}%")
            print(f"   📈 Policy Trend: {inflation_analysis['cbr_trend']}")
            print(f"   🎯 Policy Stance: {inflation_analysis['policy_stance']}")
            print(f"   📊 Average CBR: {inflation_analysis['avg_rate']:.2f}%")
            print(f"   📈 Recent Changes: {inflation_analysis['recent_changes']:.2f}pp")
            print(f"   🎯 Rate Volatility: {inflation_analysis['volatility']:.2f}%")
            print(f"   📊 Rate Range: {inflation_analysis['min_rate']:.2f}% - {inflation_analysis['max_rate']:.2f}%")
            
            # Create comprehensive dashboard
            print("\\n📊 Creating inflation analysis dashboard...")
            inflation_dashboard = inflation_modeler.create_inflation_dashboard(cbr_processed, inflation_analysis)
            
            if inflation_dashboard:
                inflation_dashboard.show()
                print("✅ Inflation dashboard created successfully!")
            else:
                print("❌ Failed to create inflation dashboard")
        else:
            print("❌ Failed to analyze inflation drivers")
    else:
        print("❌ Failed to process CBR data")
else:
    print("❌ Failed to load inflation datasets")

📊 LOADING REAL CBK INFLATION DATA
\n📥 Loading CBK inflation datasets...
   ✅ CBR Data: 117 records
   ✅ GDP Data: 25 records
   ✅ FX Data: 391 records
   ✅ Repo Data: 67220 records
   ✅ Money Supply Proxy: 216 records
\n🔧 Processing Central Bank Rate data...
✅ CBR data processed: 42 records
\n🔍 Analyzing inflation drivers...
\n🎯 INFLATION POLICY INSIGHTS:
   🏛️ Current CBR: 9.75%
   📈 Policy Trend: Easing
   🎯 Policy Stance: Dovish
   📊 Average CBR: 11.76%
   📈 Recent Changes: -1.50pp
   🎯 Rate Volatility: 3.51%
   📊 Rate Range: 6.00% - 18.00%
\n📊 Creating inflation analysis dashboard...
❌ Error creating inflation dashboard: Trace type 'pie' is not compatible with subplot type 'xy'
at grid position (2, 2)

See the docstring for the specs argument to plotly.subplots.make_subplots
for more information on subplot types
❌ Failed to create inflation dashboard


In [None]:
# Advanced Inflation Forecasting Models
print("\\n🧠 ADVANCED INFLATION FORECASTING")
print("="*50)

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
import numpy as np

class InflationForecaster:
    """Advanced inflation forecasting using multiple CBK indicators"""
    
    def __init__(self):
        self.models = {
            'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
            'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42),
            'Ridge Regression': Ridge(alpha=1.0, random_state=42),
            'Linear Model': LinearRegression()
        }
        self.scaler = StandardScaler()
        self.best_model = None
        self.model_performance = {}
    
    def create_inflation_features(self, cbr_df):
        """Create comprehensive features for inflation forecasting"""
        features_df = cbr_df.copy()
        
        # Lagged CBR features (past policy impact)
        for lag in [1, 2, 3, 6, 12]:
            features_df[f'cbr_lag_{lag}'] = features_df['cbr_rate'].shift(lag)
        
        # Rolling statistics (policy persistence)
        for window in [3, 6, 12]:
            features_df[f'cbr_ma_{window}'] = features_df['cbr_rate'].rolling(window=window).mean()
            features_df[f'cbr_std_{window}'] = features_df['cbr_rate'].rolling(window=window).std()
        
        # Rate change momentum
        features_df['cbr_momentum_3'] = features_df['cbr_rate'].diff(3)
        features_df['cbr_momentum_6'] = features_df['cbr_rate'].diff(6)
        
        # Policy cycle indicators
        features_df['cbr_above_avg'] = (features_df['cbr_rate'] > features_df['cbr_rate'].expanding().mean()).astype(int)
        features_df['cbr_increasing'] = (features_df['cbr_change'] > 0).astype(int)
        
        # Volatility measures
        features_df['cbr_volatility_6'] = features_df['cbr_rate'].rolling(window=6).std()
        features_df['cbr_volatility_12'] = features_df['cbr_rate'].rolling(window=12).std()
        
        # Time-based features (seasonal inflation patterns)
        features_df['month'] = features_df['date'].dt.month
        features_df['quarter'] = features_df['date'].dt.quarter
        features_df['year'] = features_df['date'].dt.year
        
        # Policy regime indicators
        features_df['policy_tightening'] = (features_df['cbr_change'].rolling(3).sum() > 0).astype(int)
        features_df['policy_easing'] = (features_df['cbr_change'].rolling(3).sum() < 0).astype(int)
        
        return features_df.dropna()
    
    def prepare_forecasting_data(self, features_df, target_col='cbr_rate', forecast_horizon=3):
        """Prepare data for inflation forecasting"""
        # Create future target (inflation expectation proxy)
        features_df[f'future_cbr_{forecast_horizon}'] = features_df[target_col].shift(-forecast_horizon)
        
        # Remove rows with NaN targets
        df_clean = features_df.dropna()
        
        # Select feature columns
        feature_cols = [col for col in df_clean.columns if col not in ['date', target_col, f'future_cbr_{forecast_horizon}']]
        
        X = df_clean[feature_cols]
        y = df_clean[f'future_cbr_{forecast_horizon}']
        
        return X, y, feature_cols, df_clean['date']
    
    def train_inflation_models(self, X, y, cv_splits=5):
        """Train multiple inflation forecasting models"""
        # Time series cross-validation
        tscv = TimeSeriesSplit(n_splits=cv_splits)
        
        results = {}
        
        for name, model in self.models.items():
            print(f"\\n🤖 Training {name} for inflation forecasting...")
            
            cv_scores = []
            
            for train_idx, test_idx in tscv.split(X):
                X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
                y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
                
                # Scale features
                X_train_scaled = self.scaler.fit_transform(X_train)
                X_test_scaled = self.scaler.transform(X_test)
                
                # Train model
                model.fit(X_train_scaled, y_train)
                
                # Predict
                y_pred = model.predict(X_test_scaled)
                
                # Calculate score
                score = r2_score(y_test, y_pred)
                cv_scores.append(score)
            
            # Final model training on all data
            X_scaled = self.scaler.fit_transform(X)
            model.fit(X_scaled, y)
            
            # Store results
            results[name] = {
                'model': model,
                'cv_scores': cv_scores,
                'avg_cv_score': np.mean(cv_scores),
                'std_cv_score': np.std(cv_scores)
            }
            
            print(f"   📊 CV R² Score: {np.mean(cv_scores):.4f} (±{np.std(cv_scores):.4f})")
        
        # Find best model
        best_model_name = max(results.keys(), key=lambda x: results[x]['avg_cv_score'])
        self.best_model = results[best_model_name]['model']
        self.model_performance = results
        
        print(f"\\n🏆 Best Model: {best_model_name} (CV R²: {results[best_model_name]['avg_cv_score']:.4f})")
        
        return results
    
    def forecast_inflation_trend(self, X_latest, periods=6):
        """Forecast future inflation policy trends"""
        if self.best_model is None:
            raise ValueError("No trained model available")
        
        forecasts = []
        current_features = X_latest.iloc[-1].values.copy()
        
        for _ in range(periods):
            # Scale features
            scaled_features = self.scaler.transform([current_features])
            
            # Make prediction
            forecast = self.best_model.predict(scaled_features)[0]
            forecasts.append(forecast)
            
            # Update features (simplified approach)
            # Shift lagged features
            current_features[0] = forecast  # Update main CBR
            current_features[1:5] = current_features[0:4]  # Update lags
        
        return forecasts

# Initialize forecaster
forecaster = InflationForecaster()

if 'cbr_processed' in locals() and cbr_processed is not None:
    print("\\n🔧 Creating inflation forecasting features...")
    inflation_features = forecaster.create_inflation_features(cbr_processed)
    
    print(f"   📊 Features created: {len(inflation_features.columns)}")
    print(f"   📊 Sample size: {len(inflation_features)}")
    
    # Prepare forecasting data
    print("\\n🎯 Preparing forecasting data...")
    X, y, feature_cols, dates = forecaster.prepare_forecasting_data(inflation_features, forecast_horizon=3)
    
    print(f"   📊 Training samples: {len(X)}")
    print(f"   📊 Features: {len(feature_cols)}")
    
    # Train forecasting models
    print("\\n🚀 Training inflation forecasting models...")
    model_results = forecaster.train_inflation_models(X, y)
    
    # Generate forecasts
    print("\\n🔮 Generating inflation trend forecasts...")
    future_forecasts = forecaster.forecast_inflation_trend(X, periods=6)
    
    print(f"\\n🎯 INFLATION FORECASTING RESULTS:")
    print(f"   🔮 6-Month CBR Forecast: {future_forecasts}")
    print(f"   📈 Expected Trend: {'Tightening' if future_forecasts[-1] > future_forecasts[0] else 'Easing'}")
    print(f"   🎯 Policy Direction: {'Hawkish' if np.mean(future_forecasts) > y.iloc[-1] else 'Dovish'}")
    
else:
    print("❌ No CBR data available for inflation forecasting")