# 🌾 Shamba Score: Climate-Adaptive Credit Scoring System

## Overview
This notebook develops an AI-driven credit scoring system that:
- Uses satellite crop imagery for yield assessment
- Incorporates local climate forecasts
- Analyzes mobile money transaction patterns
- Evaluates community reputation scores
- Tracks farm input purchase history
- Separates climate risks from farmer performance
- Provides transparent explanations and recommendations

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score, classification_report
import xgboost as xgb
import lightgbm as lgb
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
np.random.seed(42)

print("📊 Shamba Score AI System Initialized")
print("🌍 Climate-Adaptive Credit Scoring for Kenyan Farmers")

## 1. Data Sources & Feature Engineering

In [None]:
class ShambaScoreDataGenerator:
    """Generate synthetic data for Shamba Score model development"""
    
    def __init__(self, n_farmers=1000):
        self.n_farmers = n_farmers
        
    def generate_satellite_data(self):
        """Generate satellite crop imagery features"""
        return {
            'ndvi_current': np.random.normal(0.7, 0.15, self.n_farmers),
            'ndvi_historical_avg': np.random.normal(0.65, 0.12, self.n_farmers),
            'crop_health_score': np.random.beta(2, 1, self.n_farmers) * 100,
            'field_size_hectares': np.random.exponential(2, self.n_farmers),
            'vegetation_consistency': np.random.uniform(0.3, 0.95, self.n_farmers),
            'soil_moisture_index': np.random.normal(0.5, 0.2, self.n_farmers)
        }
    
    def generate_climate_data(self):
        """Generate climate forecast features"""
        return {
            'rainfall_forecast_mm': np.random.gamma(2, 50, self.n_farmers),
            'temperature_avg_c': np.random.normal(24, 3, self.n_farmers),
            'drought_risk_score': np.random.beta(1, 3, self.n_farmers) * 100,
            'flood_risk_score': np.random.beta(1, 4, self.n_farmers) * 100,
            'climate_variability_index': np.random.uniform(0.1, 0.8, self.n_farmers),
            'seasonal_pattern_match': np.random.uniform(0.4, 0.95, self.n_farmers)
        }
    
    def generate_mpesa_data(self):
        """Generate mobile money transaction features"""
        return {
            'monthly_income_avg': np.random.lognormal(9, 0.8, self.n_farmers),
            'transaction_frequency': np.random.poisson(25, self.n_farmers),
            'savings_rate': np.random.beta(2, 5, self.n_farmers),
            'payment_consistency': np.random.uniform(0.5, 0.98, self.n_farmers),
            'agricultural_payments_ratio': np.random.beta(3, 2, self.n_farmers),
            'seasonal_income_stability': np.random.uniform(0.3, 0.9, self.n_farmers)
        }
    
    def generate_community_data(self):
        """Generate community reputation features"""
        return {
            'community_trust_score': np.random.beta(3, 1, self.n_farmers) * 100,
            'cooperative_participation': np.random.binomial(1, 0.6, self.n_farmers),
            'peer_recommendations': np.random.poisson(3, self.n_farmers),
            'local_leadership_role': np.random.binomial(1, 0.15, self.n_farmers),
            'dispute_history': np.random.poisson(0.5, self.n_farmers),
            'knowledge_sharing_score': np.random.beta(2, 2, self.n_farmers) * 100
        }
    
    def generate_input_purchase_data(self):
        """Generate farm input purchase history"""
        return {
            'seed_investment_annual': np.random.lognormal(7, 0.6, self.n_farmers),
            'fertilizer_usage_kg': np.random.gamma(2, 25, self.n_farmers),
            'equipment_investment': np.random.lognormal(8, 1, self.n_farmers),
            'input_timing_score': np.random.uniform(0.4, 0.95, self.n_farmers),
            'quality_input_ratio': np.random.beta(3, 2, self.n_farmers),
            'sustainable_practices_score': np.random.beta(2, 2, self.n_farmers) * 100
        }
    
    def generate_target_variables(self, features_df):
        """Generate target variables based on features"""
        # Climate-adjusted performance score
        climate_impact = (
            features_df['drought_risk_score'] * 0.3 + 
            features_df['flood_risk_score'] * 0.2 +
            (100 - features_df['rainfall_forecast_mm']/10) * 0.2
        ) / 100
        
        # Farmer performance (independent of climate)
        farmer_performance = (
            features_df['crop_health_score'] * 0.25 +
            features_df['payment_consistency'] * 100 * 0.25 +
            features_df['community_trust_score'] * 0.25 +
            features_df['input_timing_score'] * 100 * 0.25
        ) / 100
        
        # Credit score (0-850 scale)
        base_score = 300 + (farmer_performance * 400)
        climate_adjustment = np.clip(climate_impact * 150, -100, 50)
        credit_score = np.clip(base_score - climate_adjustment, 300, 850)
        
        return {
            'credit_score': credit_score,
            'farmer_performance_score': farmer_performance * 100,
            'climate_risk_score': climate_impact * 100,
            'loan_default_probability': 1 / (1 + np.exp((credit_score - 500) / 100))
        }
    
    def generate_complete_dataset(self):
        """Generate complete synthetic dataset"""
        data = {}
        
        # Generate all feature categories
        data.update(self.generate_satellite_data())
        data.update(self.generate_climate_data())
        data.update(self.generate_mpesa_data())
        data.update(self.generate_community_data())
        data.update(self.generate_input_purchase_data())
        
        # Create DataFrame
        df = pd.DataFrame(data)
        
        # Add farmer IDs and metadata
        df['farmer_id'] = [f'KE_{i:06d}' for i in range(self.n_farmers)]
        df['region'] = np.random.choice(['Central', 'Eastern', 'Western', 'Rift Valley'], self.n_farmers)
        df['crop_type'] = np.random.choice(['Maize', 'Coffee', 'Tea', 'Beans', 'Sugarcane'], self.n_farmers)
        
        # Generate target variables
        targets = self.generate_target_variables(df)
        df.update(targets)
        
        return df

# Generate synthetic dataset
data_generator = ShambaScoreDataGenerator(n_farmers=2000)
df = data_generator.generate_complete_dataset()

print(f"📊 Generated dataset with {len(df)} farmers")
print(f"📈 Features: {len(df.columns)} columns")
df.head()

## 2. Climate Risk Separation Model

In [None]:
class ClimateRiskSeparator:
    """Separate climate-related risks from farmer performance"""
    
    def __init__(self):
        self.climate_model = RandomForestRegressor(n_estimators=100, random_state=42)
        self.performance_model = RandomForestRegressor(n_estimators=100, random_state=42)
        
    def fit(self, df):
        # Climate-related features
        climate_features = [
            'rainfall_forecast_mm', 'temperature_avg_c', 'drought_risk_score',
            'flood_risk_score', 'climate_variability_index', 'seasonal_pattern_match',
            'soil_moisture_index'
        ]
        
        # Farmer performance features
        performance_features = [
            'crop_health_score', 'payment_consistency', 'community_trust_score',
            'input_timing_score', 'savings_rate', 'agricultural_payments_ratio',
            'sustainable_practices_score', 'quality_input_ratio'
        ]
        
        # Train climate risk model
        X_climate = df[climate_features]
        y_climate = df['climate_risk_score']
        self.climate_model.fit(X_climate, y_climate)
        
        # Train farmer performance model
        X_performance = df[performance_features]
        y_performance = df['farmer_performance_score']
        self.performance_model.fit(X_performance, y_performance)
        
        # Store feature names
        self.climate_features = climate_features
        self.performance_features = performance_features
        
        print("✅ Climate Risk Separator trained successfully")
        
    def predict_separated_scores(self, df):
        """Predict separated climate and performance scores"""
        climate_risk = self.climate_model.predict(df[self.climate_features])
        farmer_performance = self.performance_model.predict(df[self.performance_features])
        
        return climate_risk, farmer_performance
    
    def get_feature_importance(self):
        """Get feature importance for transparency"""
        climate_importance = dict(zip(
            self.climate_features, 
            self.climate_model.feature_importances_
        ))
        
        performance_importance = dict(zip(
            self.performance_features,
            self.performance_model.feature_importances_
        ))
        
        return climate_importance, performance_importance

# Train the separator
separator = ClimateRiskSeparator()
separator.fit(df)

# Get feature importance
climate_imp, performance_imp = separator.get_feature_importance()

print("\n🌡️ Top Climate Risk Factors:")
for feature, importance in sorted(climate_imp.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"  {feature}: {importance:.3f}")

print("\n👨‍🌾 Top Performance Factors:")
for feature, importance in sorted(performance_imp.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"  {feature}: {importance:.3f}")

## 3. Fair Credit Scoring Model

In [None]:
class ShambaScoreModel:
    """Main Shamba Score credit scoring model"""
    
    def __init__(self):
        self.model = xgb.XGBRegressor(
            n_estimators=200,
            max_depth=6,
            learning_rate=0.1,
            random_state=42
        )
        self.scaler = StandardScaler()
        self.feature_names = None
        
    def prepare_features(self, df):
        """Prepare features for model training"""
        # Select all relevant features except targets
        exclude_cols = [
            'farmer_id', 'credit_score', 'farmer_performance_score', 
            'climate_risk_score', 'loan_default_probability'
        ]
        
        feature_cols = [col for col in df.columns if col not in exclude_cols]
        
        # Handle categorical variables
        df_processed = df.copy()
        
        # Encode categorical variables
        le_region = LabelEncoder()
        le_crop = LabelEncoder()
        
        df_processed['region_encoded'] = le_region.fit_transform(df_processed['region'])
        df_processed['crop_type_encoded'] = le_crop.fit_transform(df_processed['crop_type'])
        
        # Remove original categorical columns
        feature_cols = [col for col in feature_cols if col not in ['region', 'crop_type']]
        feature_cols.extend(['region_encoded', 'crop_type_encoded'])
        
        return df_processed[feature_cols], feature_cols
    
    def fit(self, df):
        """Train the Shamba Score model"""
        X, feature_names = self.prepare_features(df)
        y = df['credit_score']
        
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42
        )
        
        # Scale features
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)
        
        # Train model
        self.model.fit(X_train_scaled, y_train)
        
        # Evaluate
        train_pred = self.model.predict(X_train_scaled)
        test_pred = self.model.predict(X_test_scaled)
        
        train_r2 = r2_score(y_train, train_pred)
        test_r2 = r2_score(y_test, test_pred)
        
        self.feature_names = feature_names
        
        print(f"✅ Shamba Score Model trained successfully")
        print(f"📊 Training R²: {train_r2:.3f}")
        print(f"📊 Testing R²: {test_r2:.3f}")
        
        return X_test, y_test, test_pred
    
    def predict_score(self, farmer_data):
        """Predict credit score for a single farmer"""
        X, _ = self.prepare_features(farmer_data)
        X_scaled = self.scaler.transform(X)
        score = self.model.predict(X_scaled)[0]
        
        return np.clip(score, 300, 850)
    
    def get_feature_importance(self):
        """Get feature importance for model transparency"""
        importance = dict(zip(self.feature_names, self.model.feature_importances_))
        return sorted(importance.items(), key=lambda x: x[1], reverse=True)

# Train the main model
shamba_model = ShambaScoreModel()
X_test, y_test, y_pred = shamba_model.fit(df)

# Show top features
print("\n🎯 Top 10 Most Important Features:")
for i, (feature, importance) in enumerate(shamba_model.get_feature_importance()[:10]):
    print(f"  {i+1}. {feature}: {importance:.3f}")

## 4. Fairness & Bias Detection

In [None]:
class FairnessAnalyzer:
    """Analyze model fairness across different groups"""
    
    def __init__(self):
        pass
    
    def analyze_regional_fairness(self, df, predictions):
        """Analyze fairness across regions"""
        df_analysis = df.copy()
        df_analysis['predicted_score'] = predictions
        
        regional_stats = df_analysis.groupby('region').agg({
            'predicted_score': ['mean', 'std', 'count'],
            'climate_risk_score': 'mean',
            'farmer_performance_score': 'mean'
        }).round(2)
        
        print("🌍 Regional Fairness Analysis:")
        print(regional_stats)
        
        return regional_stats
    
    def analyze_crop_fairness(self, df, predictions):
        """Analyze fairness across crop types"""
        df_analysis = df.copy()
        df_analysis['predicted_score'] = predictions
        
        crop_stats = df_analysis.groupby('crop_type').agg({
            'predicted_score': ['mean', 'std', 'count'],
            'climate_risk_score': 'mean',
            'farmer_performance_score': 'mean'
        }).round(2)
        
        print("\n🌾 Crop Type Fairness Analysis:")
        print(crop_stats)
        
        return crop_stats
    
    def detect_bias(self, df, predictions, threshold=0.1):
        """Detect potential bias in predictions"""
        df_analysis = df.copy()
        df_analysis['predicted_score'] = predictions
        
        # Check for correlation between protected attributes and residuals
        df_analysis['residual'] = df_analysis['credit_score'] - df_analysis['predicted_score']
        
        bias_metrics = {}
        
        # Regional bias
        regional_residuals = df_analysis.groupby('region')['residual'].mean()
        bias_metrics['regional_bias'] = regional_residuals.std()
        
        # Crop type bias
        crop_residuals = df_analysis.groupby('crop_type')['residual'].mean()
        bias_metrics['crop_bias'] = crop_residuals.std()
        
        print("\n⚖️ Bias Detection Results:")
        for metric, value in bias_metrics.items():
            status = "✅ Low" if value < threshold else "⚠️ High"
            print(f"  {metric}: {value:.3f} ({status})")
        
        return bias_metrics

# Analyze fairness
fairness_analyzer = FairnessAnalyzer()
regional_fairness = fairness_analyzer.analyze_regional_fairness(df.iloc[len(X_test):], y_pred)
crop_fairness = fairness_analyzer.analyze_crop_fairness(df.iloc[len(X_test):], y_pred)
bias_metrics = fairness_analyzer.detect_bias(df.iloc[len(X_test):], y_pred)

## 5. Explainable AI & Recommendations

In [None]:
class ShambaScoreExplainer:
    """Provide explanations and recommendations for credit scores"""
    
    def __init__(self, model, separator):
        self.model = model
        self.separator = separator
        
    def explain_score(self, farmer_data):
        """Provide detailed explanation of credit score"""
        # Get predictions
        credit_score = self.model.predict_score(farmer_data)
        climate_risk, farmer_performance = self.separator.predict_separated_scores(farmer_data)
        
        # Get feature contributions
        feature_importance = dict(self.model.get_feature_importance())
        
        explanation = {
            'credit_score': round(credit_score, 0),
            'score_category': self._categorize_score(credit_score),
            'climate_risk': round(climate_risk[0], 1),
            'farmer_performance': round(farmer_performance[0], 1),
            'key_strengths': self._identify_strengths(farmer_data, feature_importance),
            'improvement_areas': self._identify_weaknesses(farmer_data, feature_importance),
            'climate_factors': self._explain_climate_impact(farmer_data)
        }
        
        return explanation
    
    def _categorize_score(self, score):
        """Categorize credit score"""
        if score >= 750:
            return "Excellent (750-850)"
        elif score >= 650:
            return "Good (650-749)"
        elif score >= 550:
            return "Fair (550-649)"
        else:
            return "Poor (300-549)"
    
    def _identify_strengths(self, farmer_data, importance):
        """Identify farmer's key strengths"""
        strengths = []
        
        if farmer_data['community_trust_score'].iloc[0] > 80:
            strengths.append("Strong community reputation")
        if farmer_data['payment_consistency'].iloc[0] > 0.9:
            strengths.append("Excellent payment history")
        if farmer_data['crop_health_score'].iloc[0] > 80:
            strengths.append("High crop health indicators")
        if farmer_data['sustainable_practices_score'].iloc[0] > 75:
            strengths.append("Good sustainable farming practices")
            
        return strengths[:3]  # Top 3 strengths
    
    def _identify_weaknesses(self, farmer_data, importance):
        """Identify areas for improvement"""
        improvements = []
        
        if farmer_data['savings_rate'].iloc[0] < 0.2:
            improvements.append("Increase savings rate")
        if farmer_data['input_timing_score'].iloc[0] < 0.7:
            improvements.append("Improve input timing")
        if farmer_data['equipment_investment'].iloc[0] < 10000:
            improvements.append("Consider equipment upgrades")
        if farmer_data['fertilizer_usage_kg'].iloc[0] < 30:
            improvements.append("Optimize fertilizer usage")
            
        return improvements[:3]  # Top 3 improvements
    
    def _explain_climate_impact(self, farmer_data):
        """Explain climate-related factors"""
        climate_factors = []
        
        drought_risk = farmer_data['drought_risk_score'].iloc[0]
        flood_risk = farmer_data['flood_risk_score'].iloc[0]
        
        if drought_risk > 60:
            climate_factors.append(f"High drought risk ({drought_risk:.0f}%)")
        if flood_risk > 40:
            climate_factors.append(f"Elevated flood risk ({flood_risk:.0f}%)")
        if farmer_data['climate_variability_index'].iloc[0] > 0.6:
            climate_factors.append("High climate variability in region")
            
        return climate_factors
    
    def generate_recommendations(self, farmer_data, explanation):
        """Generate personalized recommendations"""
        recommendations = {
            'immediate_actions': [],
            'medium_term_goals': [],
            'climate_adaptation': [],
            'financial_products': []
        }
        
        score = explanation['credit_score']
        
        # Immediate actions
        if farmer_data['savings_rate'].iloc[0] < 0.3:
            recommendations['immediate_actions'].append(
                "Set up automatic savings of 10% of monthly income"
            )
        
        if farmer_data['cooperative_participation'].iloc[0] == 0:
            recommendations['immediate_actions'].append(
                "Join local farmer cooperative for better market access"
            )
        
        # Medium-term goals
        if farmer_data['sustainable_practices_score'].iloc[0] < 70:
            recommendations['medium_term_goals'].append(
                "Adopt climate-smart agriculture practices"
            )
        
        # Climate adaptation
        if explanation['climate_risk'] > 60:
            recommendations['climate_adaptation'].append(
                "Consider drought-resistant crop varieties"
            )
            recommendations['climate_adaptation'].append(
                "Invest in water conservation systems"
            )
        
        # Financial products
        if score >= 650:
            recommendations['financial_products'].append(
                "Eligible for premium agricultural loans (5-7% interest)"
            )
        elif score >= 550:
            recommendations['financial_products'].append(
                "Eligible for standard agricultural loans (8-12% interest)"
            )
        else:
            recommendations['financial_products'].append(
                "Focus on building credit history with micro-loans"
            )
        
        return recommendations

# Create explainer
explainer = ShambaScoreExplainer(shamba_model, separator)

# Example explanation for a single farmer
sample_farmer = df.iloc[[0]]
explanation = explainer.explain_score(sample_farmer)
recommendations = explainer.generate_recommendations(sample_farmer, explanation)

print("🎯 Sample Farmer Credit Score Explanation:")
print(f"Credit Score: {explanation['credit_score']} ({explanation['score_category']})")
print(f"Climate Risk: {explanation['climate_risk']}%")
print(f"Farmer Performance: {explanation['farmer_performance']}%")

print("\n💪 Key Strengths:")
for strength in explanation['key_strengths']:
    print(f"  • {strength}")

print("\n📈 Improvement Areas:")
for improvement in explanation['improvement_areas']:
    print(f"  • {improvement}")

print("\n🌡️ Climate Factors:")
for factor in explanation['climate_factors']:
    print(f"  • {factor}")

print("\n💡 Recommendations:")
for category, items in recommendations.items():
    if items:
        print(f"  {category.replace('_', ' ').title()}:")
        for item in items:
            print(f"    - {item}")

## 6. Model Validation & Performance Metrics

In [None]:
# Model performance visualization
plt.figure(figsize=(15, 10))

# 1. Actual vs Predicted scores
plt.subplot(2, 3, 1)
plt.scatter(y_test, y_pred, alpha=0.6)
plt.plot([300, 850], [300, 850], 'r--', lw=2)
plt.xlabel('Actual Credit Score')
plt.ylabel('Predicted Credit Score')
plt.title('Actual vs Predicted Scores')
plt.grid(True, alpha=0.3)

# 2. Residuals plot
plt.subplot(2, 3, 2)
residuals = y_test - y_pred
plt.scatter(y_pred, residuals, alpha=0.6)
plt.axhline(y=0, color='r', linestyle='--')
plt.xlabel('Predicted Credit Score')
plt.ylabel('Residuals')
plt.title('Residuals Plot')
plt.grid(True, alpha=0.3)

# 3. Feature importance
plt.subplot(2, 3, 3)
top_features = shamba_model.get_feature_importance()[:10]
features, importances = zip(*top_features)
plt.barh(range(len(features)), importances)
plt.yticks(range(len(features)), [f.replace('_', ' ').title() for f in features])
plt.xlabel('Feature Importance')
plt.title('Top 10 Feature Importance')
plt.grid(True, alpha=0.3)

# 4. Score distribution
plt.subplot(2, 3, 4)
plt.hist(df['credit_score'], bins=30, alpha=0.7, label='Actual', density=True)
plt.hist(y_pred, bins=30, alpha=0.7, label='Predicted', density=True)
plt.xlabel('Credit Score')
plt.ylabel('Density')
plt.title('Score Distribution')
plt.legend()
plt.grid(True, alpha=0.3)

# 5. Climate vs Performance separation
plt.subplot(2, 3, 5)
plt.scatter(df['climate_risk_score'], df['farmer_performance_score'], 
           c=df['credit_score'], cmap='viridis', alpha=0.6)
plt.colorbar(label='Credit Score')
plt.xlabel('Climate Risk Score')
plt.ylabel('Farmer Performance Score')
plt.title('Climate Risk vs Performance')
plt.grid(True, alpha=0.3)

# 6. Regional score distribution
plt.subplot(2, 3, 6)
df.boxplot(column='credit_score', by='region', ax=plt.gca())
plt.title('Credit Score by Region')
plt.suptitle('')  # Remove automatic title
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print performance metrics
print("\n📊 Model Performance Metrics:")
print(f"R² Score: {r2_score(y_test, y_pred):.3f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.2f}")
print(f"Mean Absolute Error: {np.mean(np.abs(y_test - y_pred)):.2f}")

# Score distribution by category
score_categories = pd.cut(y_pred, bins=[300, 550, 650, 750, 850], 
                         labels=['Poor', 'Fair', 'Good', 'Excellent'])
print("\n🎯 Predicted Score Distribution:")
print(score_categories.value_counts().sort_index())

## 7. API Integration Functions

In [None]:
import joblib
import json

class ShambaScoreAPI:
    """API wrapper for Shamba Score model"""
    
    def __init__(self):
        self.model = None
        self.separator = None
        self.explainer = None
        
    def load_models(self, model_path='../models/'):
        """Load trained models"""
        try:
            self.model = joblib.load(f'{model_path}shamba_score_model.pkl')
            self.separator = joblib.load(f'{model_path}climate_separator.pkl')
            self.explainer = ShambaScoreExplainer(self.model, self.separator)
            print("✅ Models loaded successfully")
        except Exception as e:
            print(f"❌ Error loading models: {e}")
    
    def save_models(self, model_path='../models/'):
        """Save trained models"""
        try:
            joblib.dump(shamba_model, f'{model_path}shamba_score_model.pkl')
            joblib.dump(separator, f'{model_path}climate_separator.pkl')
            print("✅ Models saved successfully")
        except Exception as e:
            print(f"❌ Error saving models: {e}")
    
    def score_farmer(self, farmer_data_json):
        """Score a farmer from JSON input"""
        try:
            # Parse input data
            farmer_data = json.loads(farmer_data_json) if isinstance(farmer_data_json, str) else farmer_data_json
            
            # Convert to DataFrame
            df_farmer = pd.DataFrame([farmer_data])
            
            # Get explanation and recommendations
            explanation = self.explainer.explain_score(df_farmer)
            recommendations = self.explainer.generate_recommendations(df_farmer, explanation)
            
            # Combine results
            result = {
                'farmer_id': farmer_data.get('farmer_id', 'unknown'),
                'timestamp': datetime.now().isoformat(),
                'credit_score': explanation['credit_score'],
                'score_category': explanation['score_category'],
                'climate_risk_score': explanation['climate_risk'],
                'farmer_performance_score': explanation['farmer_performance'],
                'key_strengths': explanation['key_strengths'],
                'improvement_areas': explanation['improvement_areas'],
                'climate_factors': explanation['climate_factors'],
                'recommendations': recommendations,
                'model_version': '1.0',
                'confidence_score': min(95, max(60, 100 - abs(explanation['climate_risk'] - 50)))
            }
            
            return result
            
        except Exception as e:
            return {
                'error': str(e),
                'timestamp': datetime.now().isoformat()
            }
    
    def batch_score(self, farmers_data):
        """Score multiple farmers"""
        results = []
        for farmer_data in farmers_data:
            result = self.score_farmer(farmer_data)
            results.append(result)
        return results

# Initialize API
api = ShambaScoreAPI()
api.model = shamba_model
api.separator = separator
api.explainer = explainer

# Save models for production use
api.save_models()

# Test API with sample data
sample_farmer_json = {
    'farmer_id': 'KE_000001',
    'ndvi_current': 0.75,
    'crop_health_score': 85,
    'monthly_income_avg': 25000,
    'payment_consistency': 0.95,
    'community_trust_score': 88,
    'drought_risk_score': 35,
    'flood_risk_score': 20,
    'savings_rate': 0.15,
    'region': 'Central',
    'crop_type': 'Maize'
}

# Add missing required fields with defaults
required_fields = {
    'ndvi_historical_avg': 0.65,
    'field_size_hectares': 2.5,
    'vegetation_consistency': 0.8,
    'soil_moisture_index': 0.6,
    'rainfall_forecast_mm': 120,
    'temperature_avg_c': 24,
    'climate_variability_index': 0.4,
    'seasonal_pattern_match': 0.85,
    'transaction_frequency': 30,
    'agricultural_payments_ratio': 0.6,
    'seasonal_income_stability': 0.7,
    'cooperative_participation': 1,
    'peer_recommendations': 4,
    'local_leadership_role': 0,
    'dispute_history': 0,
    'knowledge_sharing_score': 75,
    'seed_investment_annual': 8000,
    'fertilizer_usage_kg': 45,
    'equipment_investment': 15000,
    'input_timing_score': 0.85,
    'quality_input_ratio': 0.8,
    'sustainable_practices_score': 70
}

sample_farmer_json.update(required_fields)

# Test API
api_result = api.score_farmer(sample_farmer_json)

print("\n🚀 API Test Result:")
print(json.dumps(api_result, indent=2))

## 8. Summary & Next Steps

### ✅ What We've Built:
1. **Climate-Adaptive Credit Scoring**: Separates climate risks from farmer performance
2. **Multi-Source Data Integration**: Satellite imagery, climate data, M-Pesa, community scores
3. **Fairness Analysis**: Ensures equitable scoring across regions and crop types
4. **Explainable AI**: Transparent explanations and personalized recommendations
5. **Production-Ready API**: Easy integration with frontend applications

### 🎯 Key Features:
- **Fair Scoring**: Accounts for climate factors beyond farmer control
- **Transparent**: Clear explanations for every score
- **Actionable**: Specific recommendations for improvement
- **Scalable**: Handles batch processing for multiple farmers
- **Robust**: Comprehensive validation and bias detection

### 🚀 Next Steps for Production:
1. **Real Data Integration**: Connect to actual satellite APIs, weather services
2. **Model Retraining**: Implement continuous learning pipeline
3. **A/B Testing**: Validate model performance in real-world scenarios
4. **Regulatory Compliance**: Ensure adherence to financial regulations
5. **Monitoring**: Set up model drift detection and performance monitoring