In [1]:
!pip install stable-baselines3
!pip install gymnasium
!pip install "shimmy>=2.0" gymnasium

Collecting stable-baselines3
  Downloading stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)
Collecting gymnasium<1.1.0,>=0.29.1 (from stable-baselines3)
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (f

In [6]:
# Training code


# Import required libraries
import os
import json
import numpy as np
import pandas as pd
import time
import torch
from datetime import datetime, timedelta
from typing import Dict, List, Tuple, Any, Optional
from collections import Counter

# RL libraries
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)

# Create necessary directories
os.makedirs("/content/models", exist_ok=True)
os.makedirs("/content/models/tensorboard", exist_ok=True)


# Add these imports at the top
import matplotlib.pyplot as plt
import seaborn as sns
from typing import List, Dict
from scipy import stats
import plotly.graph_objects as go
import plotly.express as px
from IPython.display import display, clear_output

class ModelEvaluator:
    """Class for evaluating model performance"""
    def __init__(self):
        self.reward_history = []
        self.return_history = []
        self.pool_selections = Counter()
        self.allocation_history = []
        self.balance_history = []
        self.risk_metrics = []

    def update(self, info: Dict, reward: float):
        """Update metrics with new data"""
        self.reward_history.append(reward)
        self.return_history.append(info.get('total_return', 0))
        self.pool_selections[info.get('pool', 'unknown')] += 1
        self.allocation_history.append(info.get('allocation', 0))
        self.balance_history.append(info.get('balance', 0))

    def calculate_metrics(self) -> Dict:
        """Calculate performance metrics"""
        metrics = {
            'avg_reward': np.mean(self.reward_history[-100:]),
            'reward_std': np.std(self.reward_history[-100:]),
            'avg_return': np.mean(self.return_history[-100:]),
            'sharpe_ratio': self._calculate_sharpe_ratio(),
            'max_drawdown': self._calculate_max_drawdown(),
            'pool_diversity': self._calculate_pool_diversity(),
            'allocation_stability': self._calculate_allocation_stability()
        }
        return metrics

    def _calculate_sharpe_ratio(self) -> float:
        """Calculate Sharpe ratio"""
        if len(self.return_history) < 2:
            return 0
        returns = np.diff(self.return_history)
        if np.std(returns) == 0:
            return 0
        return np.mean(returns) / (np.std(returns) + 1e-6) * np.sqrt(252)

    def _calculate_max_drawdown(self) -> float:
        """Calculate maximum drawdown"""
        if not self.balance_history:
            return 0
        peak = self.balance_history[0]
        max_drawdown = 0
        for balance in self.balance_history:
            if balance > peak:
                peak = balance
            drawdown = (peak - balance) / peak
            max_drawdown = max(max_drawdown, drawdown)
        return max_drawdown

    def _calculate_pool_diversity(self) -> float:
        """Calculate pool selection entropy"""
        if not self.pool_selections:
            return 0
        total = sum(self.pool_selections.values())
        probabilities = [count/total for count in self.pool_selections.values()]
        entropy = -sum(p * np.log2(p) for p in probabilities)
        return entropy / np.log2(len(self.pool_selections))

    def _calculate_allocation_stability(self) -> float:
        """Calculate allocation stability"""
        if len(self.allocation_history) < 2:
            return 1
        return 1 - np.std(self.allocation_history)

class PerformanceVisualizer:
    """Class for visualizing model performance"""
    def __init__(self):
        plt.style.use('seaborn')

    def plot_training_progress(self, evaluator: ModelEvaluator):
        """Plot training metrics"""
        fig = plt.figure(figsize=(20, 12))

        # Reward subplot
        ax1 = plt.subplot(2, 2, 1)
        self._plot_reward_history(evaluator.reward_history, ax1)

        # Return subplot
        ax2 = plt.subplot(2, 2, 2)
        self._plot_return_history(evaluator.return_history, ax2)

        # Pool distribution subplot
        ax3 = plt.subplot(2, 2, 3)
        self._plot_pool_distribution(evaluator.pool_selections, ax3)

        # Allocation history subplot
        ax4 = plt.subplot(2, 2, 4)
        self._plot_allocation_history(evaluator.allocation_history, ax4)

        plt.tight_layout()
        clear_output(wait=True)
        display(fig)
        plt.close()

    def _plot_reward_history(self, rewards: List[float], ax):
        """Plot reward history"""
        ax.plot(rewards, alpha=0.6, label='Episode Reward')
        ax.plot(self._moving_average(rewards, 100),
                label='100-Episode Moving Average')
        ax.set_title('Training Rewards')
        ax.set_xlabel('Episode')
        ax.set_ylabel('Reward')
        ax.legend()

    def _plot_return_history(self, returns: List[float], ax):
        """Plot return history"""
        ax.plot(returns, alpha=0.6, label='Episode Return')
        ax.plot(self._moving_average(returns, 100),
                label='100-Episode Moving Average')
        ax.set_title('Portfolio Returns')
        ax.set_xlabel('Episode')
        ax.set_ylabel('Return (%)')
        ax.legend()

    def _plot_pool_distribution(self, pool_selections: Counter, ax):
        """Plot pool selection distribution"""
        pools = list(pool_selections.keys())
        counts = list(pool_selections.values())
        total = sum(counts)
        percentages = [count/total*100 for count in counts]

        ax.bar(pools, percentages)
        ax.set_title('Pool Selection Distribution')
        ax.set_xlabel('Pool')
        ax.set_ylabel('Selection Frequency (%)')
        plt.xticks(rotation=45, ha='right')

    def _plot_allocation_history(self, allocations: List[float], ax):
        """Plot allocation history"""
        ax.plot(allocations, alpha=0.6)
        ax.plot(self._moving_average(allocations, 100),
                label='100-Episode Moving Average')
        ax.set_title('Allocation History')
        ax.set_xlabel('Episode')
        ax.set_ylabel('Allocation')
        ax.set_ylim(0, 1)
        ax.legend()

    @staticmethod
    def _moving_average(data: List[float], window: int) -> np.ndarray:
        """Calculate moving average"""
        return np.convolve(data, np.ones(window)/window, mode='valid')

class DefiInvestmentEnv(gym.Env):
    """Custom Environment for DeFi investment strategy optimization"""
    metadata = {'render.modes': ['human']}

    def __init__(self, data: pd.DataFrame, initial_balance: float = 5000, episode_length: int = 30):
        super().__init__()
        print("\n[INIT] Initializing DeFi Investment Environment")

        # Ensure timestamp is datetime
        data['timestamp'] = pd.to_datetime(data['timestamp'])
        self.data = data
        self.initial_balance = initial_balance
        self.episode_length = episode_length

        # Calculate global statistics for normalization
        self.max_tvl = data['TVL'].max()
        self.max_liquidity = data['liquidity'].max()
        self.avg_apy = data['APY'].mean()
        self.avg_il_risk = data['impermanent_loss_risk'].mean()

        # Get all unique pools for action space
        self.all_pools = sorted(data['pool_name'].unique())
        self.num_pools = len(self.all_pools)
        print(f"[INIT] Found {self.num_pools} unique pools:")
        for idx, pool in enumerate(self.all_pools):
            print(f"  {idx}: {pool}")

        # Get all unique chains for state representation
        self.all_chains = sorted(data['chain'].unique())
        self.num_chains = len(self.all_chains)
        print(f"[INIT] Found {self.num_chains} unique chains: {', '.join(self.all_chains)}")

        # Define action space: pool selection and allocation percentage
        self.action_space = spaces.Box(
            low=np.array([0, 0.1]),  # Minimum 10% allocation
            high=np.array([float(self.num_pools - 1), 0.9]),  # Maximum 90% allocation
            dtype=np.float32
        )
        print(f"[INIT] Action space configured: Pool selection (0-{self.num_pools-1}) and allocation (0.1-0.9)")

        # Define observation space
        obs_dim = (4 * self.num_pools) + 1 + len(self.all_chains)
        self.observation_space = spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=(obs_dim,),
            dtype=np.float32
        )
        print(f"[INIT] Observation space configured with dimension: {obs_dim}")

    def _get_current_state(self) -> np.ndarray:
        """Convert current day's data into state representation"""
        current_time = pd.to_datetime(self.current_day)

        # Get data for exact timestamp
        current_data = self.data[self.data['timestamp'] == current_time]

        if current_data.empty:
            print(f"[STATE] Error: No data found for timestamp {current_time.strftime('%d-%m-%Y %H:%M')}")
            return np.zeros(self.observation_space.shape, dtype=np.float32)

        # Initialize state vector
        pool_features = np.zeros(4 * self.num_pools)

        # Print header for pool information
        print("\n[STATE] Available Pools at Current Timestamp:")
        print("-" * 120)
        print(f"{'Index':<6} {'Pool Name':<20} {'TVL($M)':<12} {'APY(%)':<10} {'IL Risk':<10} {'Liquidity($M)':<15} {'Chain':<10} {'Gas($)':<8}")
        print("-" * 120)

        # Fill in features for each pool
        for i, pool_name in enumerate(self.all_pools):
            pool_data = current_data[current_data['pool_name'] == pool_name]

            if not pool_data.empty:
                tvl = pool_data['TVL'].values[0] / 1e6
                apy = pool_data['APY'].values[0] / 100
                il_risk = pool_data['impermanent_loss_risk'].values[0]
                liquidity = pool_data['liquidity'].values[0] / 1e6
                chain = pool_data['chain'].values[0]
                gas_fee = pool_data[f'gas_fee_{chain} (USD)'].values[0]

                pool_features[i*4:(i+1)*4] = [tvl, apy, il_risk, liquidity]

                print(f"{i:<6} {pool_name:<20} {tvl:>10.2f}M {apy*100:>9.2f}% {il_risk:>9.4f} "
                      f"{liquidity:>14.2f}M {chain:<10} ${gas_fee:<7.4f}")

        print("-" * 120)

        # Get market volatility
        market_volatility = current_data['market_volatility'].mean()
        print(f"\n[STATE] Market Volatility: {market_volatility:.4f}")

        # Get gas fees for each chain
        print("\n[STATE] Gas Fees by Chain:")
        gas_fees = []
        for chain in self.all_chains:
            fee_column = f'gas_fee_{chain} (USD)'
            fee = current_data[fee_column].mean()
            gas_fees.append(fee)
            print(f"[STATE] {chain}: ${fee:.4f}")

        # Combine all features
        state = np.concatenate([pool_features, [market_volatility], gas_fees])
        return state.astype(np.float32)

    def _calculate_reward(self, pool_idx: int, allocation: float) -> float:
        """Calculate reward based on chosen pool and allocation"""
        print("\n" + "="*50)
        print(f"[REWARD] Calculating reward for timestep {self.day_count + 1}/{self.episode_length}")

        pool_name = self.all_pools[int(pool_idx)]
        current_time = pd.to_datetime(self.current_day)

        print(f"[REWARD] Selected Pool: {pool_name}")
        print(f"[REWARD] Current Time: {current_time.strftime('%Y-%m-%d %H:%M')}")
        print(f"[REWARD] Current Balance: ${self.balance:.2f}")
        print(f"[REWARD] Allocation Percentage: {allocation*100:.2f}%")

        # Get data for exact timestamp
        pool_data = self.data[
            (self.data['timestamp'] == current_time) &
            (self.data['pool_name'] == pool_name)
        ]

        if pool_data.empty:
            print(f"[REWARD] ERROR: No data found for Pool '{pool_name}' at timestamp {current_time}")
            return -100  # Penalty for selecting unavailable pool

        # Extract metrics
        tvl = pool_data['TVL'].values[0]
        apy = pool_data['APY'].values[0]
        daily_apy = apy / 36500  # Convert annual APY to daily
        il_risk = pool_data['impermanent_loss_risk'].values[0]
        liquidity = pool_data['liquidity'].values[0]
        normalized_liquidity = liquidity / self.max_liquidity
        market_vol = pool_data['market_volatility'].values[0]
        chain = pool_data['chain'].values[0]

        # Get current pool metrics relative to global stats
        tvl_ratio = tvl / self.max_tvl
        apy_ratio = apy / self.avg_apy if self.avg_apy > 0 else 1
        il_risk_ratio = il_risk / self.avg_il_risk if self.avg_il_risk > 0 else 1

        # Print detailed metrics
        print(f"\n[REWARD] Pool Performance Metrics:")
        print(f"  - TVL Score: {tvl_ratio:.4f} (Pool TVL / Max TVL)")
        print(f"  - APY Score: {apy_ratio:.4f} (Pool APY / Avg APY)")
        print(f"  - Risk Score: {il_risk_ratio:.4f} (Pool Risk / Avg Risk)")
        print(f"  - Liquidity Score: {normalized_liquidity:.4f}")
        print(f"  - Market Volatility: {market_vol:.4f}")

        # Ensure allocation is within bounds and calculate investment
        allocation = max(0.1, min(0.9, allocation))
        investment = self.balance * allocation

        # Base return calculation
        daily_return = investment * daily_apy

        # Calculate reward components with balanced weights
        reward_components = {
            'yield_reward': daily_return * 10,  # Base yield reward
            'tvl_bonus': investment * tvl_ratio * 0.3,  # TVL consideration
            'apy_efficiency': investment * apy_ratio * 0.4,  # APY relative to average
            'risk_adjustment': -investment * il_risk * market_vol * 0.2,  # Risk penalty
            'liquidity_bonus': investment * normalized_liquidity * 0.5,  # Liquidity bonus
            'allocation_bonus': investment * (1 - abs(0.5 - allocation)) * 0.01,  # Balanced allocation
            'pool_diversity_bonus': investment * 0.1  # Bonus for exploring different pools
        }

        # Gas fee consideration
        gas_fee_col = f'gas_fee_{chain} (USD)'
        gas_fee = pool_data[gas_fee_col].values[0]
        gas_impact = -min(gas_fee / (investment + 1e-10), 0.001) * investment
        reward_components['gas_impact'] = gas_impact

        # Calculate stability bonus
        if len(self.allocation_history) > 0:
            last_allocation = self.allocation_history[-1]['allocation']
            stability_bonus = investment * 0.01 * (1 - abs(allocation - last_allocation))
            reward_components['stability_bonus'] = stability_bonus
        else:
            reward_components['stability_bonus'] = 0

        # APY trend bonus
        if len(self.allocation_history) > 0 and 'apy' in self.allocation_history[-1]:
            apy_trend = apy - self.allocation_history[-1]['apy']
            apy_trend_bonus = investment * max(0, apy_trend) * 0.01
            reward_components['apy_trend_bonus'] = apy_trend_bonus
        else:
            reward_components['apy_trend_bonus'] = 0

        # Calculate final reward
        base_reward = sum(reward_components.values())
        min_reward = investment * daily_apy * 5  # Minimum reward is 5x daily return
        final_reward = max(base_reward, min_reward)

        # Update portfolio balance
        new_balance = self.balance - investment + (investment + daily_return)

        # Detailed logging
        print(f"\n[REWARD] Reward Components:")
        for component, value in reward_components.items():
            print(f"  - {component}: ${value:.4f}")

        print(f"\n[REWARD] Final Calculations:")
        print(f"  - Base Reward: ${base_reward:.4f}")
        print(f"  - Minimum Reward: ${min_reward:.4f}")
        print(f"  - Final Reward: ${final_reward:.4f}")
        print(f"  - Investment Amount: ${investment:.2f}")
        print(f"  - Daily Return: ${daily_return:.4f}")
        print(f"  - Balance Update: ${self.balance:.2f} -> ${new_balance:.2f}")

        self.balance = new_balance

        # Track allocation history
        self.allocation_history.append({
            'day': self.current_day,
            'pool': pool_name,
            'allocation': allocation,
            'apy': apy,
            'daily_return': daily_return,
            'reward': final_reward,
            'balance': self.balance,
            'components': reward_components,
            'metrics': {
                'tvl_ratio': tvl_ratio,
                'apy_ratio': apy_ratio,
                'il_risk_ratio': il_risk_ratio,
                'normalized_liquidity': normalized_liquidity
            }
        })

        print("="*50)
        return final_reward

    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, Dict]:
        """Take action and move to next day"""
        print(f"\n[STEP] Taking action for timestep {self.day_count + 1}")

        # Ensure pool_idx is properly bounded
        pool_idx = int(np.clip(action[0], 0, self.num_pools - 1))
        allocation = np.clip(action[1], 0.1, 0.9)

        print(f"[STEP] Raw Action: Pool Index={action[0]}, Clipped Index={pool_idx}")
        print(f"[STEP] Selected Pool: {self.all_pools[pool_idx]}")
        print(f"[STEP] Allocation: {allocation*100:.2f}%")

        # Calculate reward
        reward = self._calculate_reward(pool_idx, allocation)

        # Move to next day
        self.day_count += 1
        next_day_idx = min(self.start_idx + self.day_count, len(self.unique_days) - 1)
        self.current_day = self.unique_days[next_day_idx]

        # Check if episode is done
        terminated = self.day_count >= self.episode_length
        truncated = False

        # Get new state
        next_state = self._get_current_state()

        # Additional info
        info = {
            'balance': self.balance,
            'day': self.current_day,
            'allocation': allocation,
            'pool': self.all_pools[pool_idx],
            'episode_step': self.day_count,
            'total_return': (self.balance - self.initial_balance) / self.initial_balance * 100
        }

        print(f"[STEP] Episode Progress: {self.day_count}/{self.episode_length}")
        print(f"[STEP] Current Balance: ${self.balance:.2f}")
        print(f"[STEP] Total Return: {info['total_return']:.2f}%")
        if terminated:
            print("\n[EPISODE COMPLETE]")
            print(f"Final Balance: ${self.balance:.2f}")
            print(f"Total Return: {info['total_return']:.2f}%")

        return next_state, reward, terminated, truncated, info

    def reset(self, seed=None, options=None) -> Tuple[np.ndarray, Dict]:
        """Reset environment for new episode"""
        super().reset(seed=seed)
        if seed is not None:
            np.random.seed(seed)

        print("\n[RESET] Starting new episode")
        self.balance = self.initial_balance
        print(f"[RESET] Initial Balance: ${self.balance:.2f}")

        # Get all unique timestamps
        self.unique_days = sorted(self.data['timestamp'].unique())

        # Handle edge case: Ensure valid episode start index
        max_start_idx = max(0, len(self.unique_days) - self.episode_length)
        self.start_idx = np.random.randint(0, max_start_idx) if max_start_idx > 0 else 0

        # Set current day
        self.current_day = self.unique_days[self.start_idx]

        # Reset tracking variables
        self.day_count = 0
        self.allocation_history = []

        print(f"[RESET] Start Date: {self.current_day.strftime('%d-%m-%Y %H:%M')}")
        print(f"[RESET] Episode Length: {self.episode_length} steps")

        return self._get_current_state(), {}

    def render(self, mode='human'):
        """Render the environment"""
        print("\n[RENDER] Current Environment State")
        print(f"Day: {self.current_day.strftime('%d-%m-%Y %H:%M')}")
        print(f"Balance: ${self.balance:.2f}")
        print(f"Episode Step: {self.day_count}/{self.episode_length}")

        if self.allocation_history:
            last_alloc = self.allocation_history[-1]
            print("\nLast Action:")
            print(f"Pool: {last_alloc['pool']}")
            print(f"Allocation: {last_alloc['allocation']*100:.1f}%")
            print(f"Daily Return: ${last_alloc['daily_return']:.2f}")
            print(f"Reward: ${last_alloc['reward']:.4f}")

class MetricsCallback(BaseCallback):
    """Custom callback for logging detailed training metrics"""

    def __init__(self, verbose=0):
        super().__init__(verbose)
        self.training_start = time.time()
        self.episode_rewards = []
        self.episode_lengths = []
        self.current_episode_reward = 0
        self.current_episode_length = 0
        self.total_episodes = 0
        self.pool_selections = Counter()

    def _on_step(self) -> bool:
        """Called after each step"""
        info = self.locals['infos'][0]
        reward = self.locals['rewards'][0]

        # Track pool selections
        if 'pool' in info:
            self.pool_selections[info['pool']] += 1

        self.current_episode_reward += reward
        self.current_episode_length += 1

        if 'episode' in info.keys():
            self.episode_rewards.append(self.current_episode_reward)
            self.episode_lengths.append(self.current_episode_length)
            self.total_episodes += 1

            # Log episode statistics
            elapsed_time = time.time() - self.training_start
            avg_reward = np.mean(self.episode_rewards[-100:])
            avg_length = np.mean(self.episode_lengths[-100:])

            print(f"\n[TRAINING] Episode {self.total_episodes} completed")
            print(f"Time Elapsed: {elapsed_time:.1f}s")
            print(f"Episode Reward: ${self.current_episode_reward:.2f}")
            print(f"100-Episode Average Reward: ${avg_reward:.2f}")
            print(f"Episode Length: {self.current_episode_length}")
            print(f"100-Episode Average Length: {avg_length:.1f}")
            print(f"Final Balance: ${info['balance']:.2f}")
            print(f"Total Return: {info['total_return']:.2f}%")

            # Print pool selection distribution every 10 episodes
            if self.total_episodes % 10 == 0:
                total_selections = sum(self.pool_selections.values())
                print("\nPool Selection Distribution:")
                for pool, count in self.pool_selections.most_common():
                    percentage = (count / total_selections) * 100
                    print(f"{pool}: {percentage:.1f}%")

            self.current_episode_reward = 0
            self.current_episode_length = 0

        return True

class DefiInvestmentOptimizer:
    """DeFi Investment Strategy Optimizer"""

    def __init__(self, data_path: str, model_dir: str = "/content/models"):
        print("\n[OPTIMIZER] Initializing DeFi Investment Strategy Optimizer")

        self.data_path = data_path
        self.model_dir = model_dir
        os.makedirs(model_dir, exist_ok=True)

        # Load and preprocess data
        self.data = self._load_data()
        self._preprocess_data()

        # Create environment
        self.env = self._create_environment()
        print("[OPTIMIZER] Environment created successfully")

    def _load_data(self) -> pd.DataFrame:
        """Load and prepare dataset"""
        print(f"\n[DATA] Loading data from {self.data_path}")

        data = pd.read_csv(self.data_path)
        data['timestamp'] = pd.to_datetime(data['timestamp'])

        print(f"[DATA] Successfully loaded {len(data)} records")
        print(f"[DATA] Date range: {data['timestamp'].min()} to {data['timestamp'].max()}")
        print(f"[DATA] Unique pools: {len(data['pool_name'].unique())}")
        print(f"[DATA] Unique chains: {len(data['chain'].unique())}")

        return data

    def _preprocess_data(self):
        """Calculate global statistics and handle missing values"""
        print("\n[PREPROCESS] Calculating global statistics")

        numeric_columns = ['TVL', 'APY', 'impermanent_loss_risk', 'liquidity', 'market_volatility']

        for col in numeric_columns:
            stats = {
                'mean': self.data[col].mean(),
                'std': self.data[col].std(),
                'min': self.data[col].min(),
                'max': self.data[col].max()
            }
            print(f"\n[PREPROCESS] {col} statistics:")
            for stat, value in stats.items():
                print(f"  - {stat}: {value:.4f}")

        # Fill missing values
        self.data = self.data.fillna(method='ffill').fillna(method='bfill')

    def _create_environment(self) -> gym.Env:
        """Create and wrap the environment"""
        env = DefiInvestmentEnv(
            data=self.data,
            initial_balance=5000,
            episode_length=30
        )

        # Wrap environment
        env = Monitor(env, self.model_dir)
        env = DummyVecEnv([lambda: env])

        return env

    def train(self, total_timesteps: int = 200000):
        """Train the model"""
        print("\n[TRAIN] Starting model training")
        print(f"[TRAIN] Total timesteps: {total_timesteps}")

        # Initialize callbacks
        checkpoint_callback = CheckpointCallback(
            save_freq=10000,
            save_path=self.model_dir,
            name_prefix="defi_model"
        )
        metrics_callback = MetricsCallback()

        # Initialize model with modified parameters
        model = PPO(
            "MlpPolicy",
            self.env,
            verbose=0,
            device='cpu',
            learning_rate=0.0003,
            n_steps=2048,
            batch_size=64,
            n_epochs=10,
            gamma=0.99,
            gae_lambda=0.95,
            clip_range=0.2,
            policy_kwargs={
                "net_arch": [dict(pi=[256, 256], vf=[256, 256])],
                "activation_fn": torch.nn.ReLU
            },
            tensorboard_log=f"{self.model_dir}/tensorboard/"
        )

        print("[TRAIN] Model initialized with PPO algorithm")
        print("[TRAIN] Training started...")

        # Train model
        model.learn(
            total_timesteps=total_timesteps,
            callback=[checkpoint_callback, metrics_callback]
        )

        # Save final model
        final_model_path = f"{self.model_dir}/final_model"
        model.save(final_model_path)
        print(f"\n[TRAIN] Training completed")
        print(f"[TRAIN] Final model saved to {final_model_path}")

        return model

    def analyze_pool_distribution(self, num_samples: int = 100):
        """Analyze the distribution of pool selections"""
        print("\n[ANALYSIS] Analyzing pool selection distribution")

        model_path = f"{self.model_dir}/final_model"
        model = PPO.load(model_path, env=self.env)

        pool_selections = []
        for _ in range(num_samples):
            obs = self.env.reset()[0]
            action, _ = model.predict(obs, deterministic=False)
            pool_idx = int(np.clip(action[0], 0, len(self.env.get_attr('all_pools')[0]) - 1))
            pool_selections.append(pool_idx)

        # Analyze distribution
        unique_pools = self.env.get_attr('all_pools')[0]
        pool_counts = Counter(pool_selections)

        print("\n[ANALYSIS] Pool Selection Distribution:")
        print("-" * 60)
        print(f"{'Pool Name':<30} {'Count':<10} {'Percentage':<10}")
        print("-" * 60)

        for idx in range(len(unique_pools)):
            count = pool_counts.get(idx, 0)
            percentage = (count / num_samples) * 100
            print(f"{unique_pools[idx]:<30} {count:<10} {percentage:>6.2f}%")

        return pool_counts

    def generate_recommendation(self, model_path: str = None) -> Dict:
        """Generate investment recommendations"""
        print("\n[RECOMMEND] Generating investment recommendations")

        if model_path:
            model = PPO.load(model_path, env=self.env)
            print(f"[RECOMMEND] Loaded model from {model_path}")
        else:
            model_path = f"{self.model_dir}/final_model"
            model = PPO.load(model_path, env=self.env)
            print(f"[RECOMMEND] Using default model from {model_path}")

        # Get current state
        obs = self.env.reset()[0]
        action, _ = model.predict(obs, deterministic=True)

        # Extract pool and allocation
        pool_idx = int(np.clip(action[0], 0, len(self.env.get_attr('all_pools')[0]) - 1))
        allocation = float(np.clip(action[1], 0.1, 0.9))
        pool_name = self.env.get_attr('all_pools')[0][pool_idx]

        # Get current pool data
        current_time = self.env.get_attr('current_day')[0]
        pool_data = self.data[
            (self.data['timestamp'] == current_time) &
            (self.data['pool_name'] == pool_name)
        ].iloc[0]

        # Calculate pool score
        tvl_score = pool_data['TVL'] / self.data['TVL'].max()
        apy_score = pool_data['APY'] / self.data['APY'].mean()
        risk_score = 1 - (pool_data['impermanent_loss_risk'] / self.data['impermanent_loss_risk'].max())
        liquidity_score = pool_data['liquidity'] / self.data['liquidity'].max()

        total_score = (tvl_score + apy_score + risk_score + liquidity_score) / 4

        recommendation = {
            'timestamp': current_time.strftime('%Y-%m-%d %H:%M'),
            'pool': pool_name,
            'allocation': allocation,
            'metrics': {
                'TVL': f"${pool_data['TVL']:,.2f}",
                'APY': f"{pool_data['APY']:.2f}%",
                'Impermanent Loss Risk': f"{pool_data['impermanent_loss_risk']:.4f}",
                'Liquidity': f"${pool_data['liquidity']:,.2f}",
                'Market Volatility': f"{pool_data['market_volatility']:.4f}"
            },
            'scores': {
                'TVL Score': f"{tvl_score:.4f}",
                'APY Score': f"{apy_score:.4f}",
                'Risk Score': f"{risk_score:.4f}",
                'Liquidity Score': f"{liquidity_score:.4f}",
                'Total Score': f"{total_score:.4f}"
            },
            'reasoning': self._generate_pool_reasoning(pool_data)
        }

        return recommendation

    def _generate_pool_reasoning(self, pool_data: pd.Series) -> str:
        """Generate explanation for pool recommendation"""
        reasons = []

        # TVL consideration
        if pool_data['TVL'] > self.data['TVL'].mean():
            reasons.append("High Total Value Locked indicates strong security and stability")

        # APY analysis
        if pool_data['APY'] > self.data['APY'].mean():
            reasons.append("Above-average APY offers attractive yield potential")

        # Risk assessment
        if pool_data['impermanent_loss_risk'] < self.data['impermanent_loss_risk'].mean():
            reasons.append("Lower impermanent loss risk compared to other pools")

        # Liquidity evaluation
        if pool_data['liquidity'] > self.data['liquidity'].mean():
            reasons.append("High liquidity ensures easier entry and exit")

        # Market conditions
        if pool_data['market_volatility'] < self.data['market_volatility'].mean():
            reasons.append("Favorable market volatility conditions")

        return " | ".join(reasons)

def main():
    """Main function to demonstrate optimizer usage"""
    print("\n[MAIN] Starting DeFi Investment Strategy Optimization")

    # Delete existing model directory to force retraining
    import shutil
    shutil.rmtree("/content/models", ignore_errors=True)

    # Initialize optimizer
    optimizer = DefiInvestmentOptimizer(
        data_path="/content/realistic_defi_yield_training_dataset_20000.csv",
        model_dir="/content/models"
    )

    # Train new model
    print("\n[MAIN] Starting new training...")
    optimizer.train(total_timesteps=200000)

    # Analyze pool distribution
    optimizer.analyze_pool_distribution(num_samples=100)

    # Generate multiple recommendations to verify diversity
    print("\n[MAIN] Generating multiple recommendations...")
    for i in range(5):
        recommendation = optimizer.generate_recommendation()
        print(f"\nRecommendation {i+1}:")
        print(f"Pool: {recommendation['pool']}")
        print(f"Allocation: {recommendation['allocation']*100:.2f}%")
        print(f"Total Score: {recommendation['scores']['Total Score']}")

if __name__ == "__main__":
    main()


[MAIN] Starting DeFi Investment Strategy Optimization

[OPTIMIZER] Initializing DeFi Investment Strategy Optimizer

[DATA] Loading data from /content/realistic_defi_yield_training_dataset_20000.csv
[DATA] Successfully loaded 17928 records
[DATA] Date range: 2025-01-01 00:00:00 to 2025-03-24 23:00:00
[DATA] Unique pools: 9
[DATA] Unique chains: 9

[PREPROCESS] Calculating global statistics

[PREPROCESS] TVL statistics:
  - mean: 994288302.2251
  - std: 578607714.6430
  - min: 1069273.2562
  - max: 1999936935.1081

[PREPROCESS] APY statistics:
  - mean: 10.0905
  - std: 5.7777
  - min: 0.0018
  - max: 20.9323

[PREPROCESS] impermanent_loss_risk statistics:
  - mean: 0.0696
  - std: 0.0260
  - min: 0.0151
  - max: 0.1000

[PREPROCESS] liquidity statistics:
  - mean: 462424503.7311
  - std: 344301577.4805
  - min: 233700.3048
  - max: 1557342972.7459

[PREPROCESS] market_volatility statistics:
  - mean: 0.0549
  - std: 0.0261
  - min: 0.0100
  - max: 0.1000

[INIT] Initializing DeFi Inves

  self.data = self.data.fillna(method='ffill').fillna(method='bfill')


[INIT] Found 9 unique pools:
  0: Aave USDC
  1: Balancer DAI/USDC
  2: Curve stETH/ETH
  3: PancakeSwap BNB/USDT
  4: Raydium SOL/USDC
  5: Ref Finance USDC/NEAR
  6: Stader NEAR Staking
  7: SushiSwap MATIC/ETH
  8: Uniswap ETH/USDT
[INIT] Found 9 unique chains: Arbitrum, Aurora, Avalanche, BSC, Ethereum, NEAR, Optimism, Polygon, Solana
[INIT] Action space configured: Pool selection (0-8) and allocation (0.1-0.9)
[INIT] Observation space configured with dimension: 46
[OPTIMIZER] Environment created successfully

[MAIN] Starting new training...

[TRAIN] Starting model training
[TRAIN] Total timesteps: 200000
[TRAIN] Model initialized with PPO algorithm
[TRAIN] Training started...

[RESET] Starting new episode
[RESET] Initial Balance: $5000.00
[RESET] Start Date: 16-02-2025 22:00
[RESET] Episode Length: 30 steps

[STATE] Available Pools at Current Timestamp:
------------------------------------------------------------------------------------------------------------------------
Index  P

  gym.logger.warn(
  gym.logger.warn(


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
8      Uniswap ETH/USDT        1092.88M      9.42%    0.0366         321.16M Ethereum   $10.1727
------------------------------------------------------------------------------------------------------------------------

[STATE] Market Volatility: 0.0513

[STATE] Gas Fees by Chain:
[STATE] Arbitrum: $1.7576
[STATE] Aurora: $0.0381
[STATE] Avalanche: $2.9508
[STATE] BSC: $1.4900
[STATE] Ethereum: $10.0799
[STATE] NEAR: $0.0210
[STATE] Optimism: $0.6740
[STATE] Polygon: $0.5931
[STATE] Solana: $0.0049
[STEP] Episode Progress: 26/30
[STEP] Current Balance: $5028.28
[STEP] Total Return: 0.57%

[STEP] Taking action for timestep 27
[STEP] Raw Action: Pool Index=8.0, Clipped Index=8
[STEP] Selected Pool: Uniswap ETH/USDT
[STEP] Allocation: 90.00%

[REWARD] Calculating reward for timestep 27/30
[REWARD] Selected Pool: Uniswap ETH/USDT
[REWARD] Current Time: 2025-01-07 17:00
[REWARD] Current Balance: $5028.28
[REWARD] Allocation Per




[RESET] Starting new episode
[RESET] Initial Balance: $5000.00
[RESET] Start Date: 05-02-2025 08:00
[RESET] Episode Length: 30 steps

[STATE] Available Pools at Current Timestamp:
------------------------------------------------------------------------------------------------------------------------
Index  Pool Name            TVL($M)      APY(%)     IL Risk    Liquidity($M)   Chain      Gas($)  
------------------------------------------------------------------------------------------------------------------------
0      Aave USDC               1293.98M      7.10%    0.0247         809.50M Aurora     $0.0097 
1      Balancer DAI/USDC        337.47M     16.57%    0.0506         217.47M BSC        $2.8499 
2      Curve stETH/ETH          838.89M     10.78%    0.0355         454.76M Polygon    $0.3924 
3      PancakeSwap BNB/USDT     191.47M     18.26%    0.0895          96.77M Arbitrum   $1.2636 
4      Raydium SOL/USDC         774.89M     12.31%    0.1000         193.71M Optimism   $1

In [7]:
# Download the trained model

from google.colab import files
import os
import json

# First, verify the model exists
if os.path.exists("/content/models/final_model.zip"):
    print("[CHECK] Found trained model at /content/models/final_model.zip")

    # Download the model file
    files.download("/content/models/final_model.zip")
    print("[DOWNLOAD] Downloaded model file: final_model.zip")

    # Download the environment configuration if it exists
    if os.path.exists("/content/models/env_config.json"):
        files.download("/content/models/env_config.json")
        print("[DOWNLOAD] Downloaded environment configuration: env_config.json")
    else:
        print("[WARNING] Environment configuration file not found")

    print("\n[DOWNLOAD] Download complete!")
    print("Make sure to save both files:")
    print("1. final_model.zip - Your trained model")
    print("2. env_config.json - Environment configuration (if available)")
else:
    print("[ERROR] No trained model found at /content/models/final_model.zip")
    print("Please ensure you have trained the model first.")

[CHECK] Found trained model at /content/models/final_model.zip


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[DOWNLOAD] Downloaded model file: final_model.zip

[DOWNLOAD] Download complete!
Make sure to save both files:
1. final_model.zip - Your trained model
2. env_config.json - Environment configuration (if available)


In [28]:
# Basic inference

import json
import numpy as np
import pandas as pd
from datetime import datetime
from stable_baselines3 import PPO
import gymnasium as gym
from gymnasium import spaces
from typing import Dict, Any
import os

# Specify your model path here
MODEL_PATH = "/content/models/final_model.zip"  # Update this path according to your Colab environment

class InferenceEnv(gym.Env):
    """Simplified environment for inference"""
    def __init__(self, num_pools, num_chains):
        super().__init__()
        self.num_pools = num_pools

        self.action_space = spaces.Box(
            low=np.array([0, 0.1], dtype=np.float32),
            high=np.array([8.0, 0.9], dtype=np.float32),
            dtype=np.float32
        )

        self.observation_space = spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=(46,),
            dtype=np.float32
        )

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        observation = np.zeros(self.observation_space.shape, dtype=np.float32)
        return observation, {}

    def step(self, action):
        observation = np.zeros(self.observation_space.shape, dtype=np.float32)
        return observation, 0, True, False, {}

class DefiYieldOptimizer:
    def __init__(self, model_path: str = MODEL_PATH):
        self.model_path = model_path
        self.model = None
        self.env = None

    def load_model(self, num_pools: int, num_chains: int):
        """Load the model once and reuse it"""
        if self.model is None:
            if not os.path.exists(self.model_path):
                raise FileNotFoundError(f"Model not found at {self.model_path}")

            self.env = InferenceEnv(num_pools=num_pools, num_chains=num_chains)
            self.model = PPO.load(self.model_path, env=self.env, verbose=1)
            print("Model loaded successfully")

    def prepare_observation(self, input_data: Dict[str, Any]) -> np.ndarray:
        """Convert input JSON data to model observation"""
        pools = input_data['available_liquidity_pools']
        observation = np.zeros(46, dtype=np.float32)

        max_liquidity = max(pool['liquidity'] for pool in pools)
        avg_apy = sum(pool['apy'] for pool in pools) / len(pools)

        for i, pool in enumerate(pools):
            base_idx = i * 15
            observation[base_idx] = pool['liquidity'] / max_liquidity
            observation[base_idx + 1] = pool['apy'] / avg_apy
            observation[base_idx + 2] = pool['impermanent_loss_risk']
            observation[base_idx + 3] = pool['liquidity'] / max_liquidity
            observation[base_idx + 4:base_idx + 15] = pool['liquidity'] / max_liquidity

        observation[45] = input_data['market_volatility']

        gas_fees = list(input_data['gas_fees'].values())
        max_gas = max(gas_fees)
        for i, fee in enumerate(gas_fees):
            observation[43 + i] = fee / max_gas

        return observation

    def calculate_risk_score(self, pool_data: Dict[str, Any], market_volatility: float) -> float:
        return (pool_data['impermanent_loss_risk'] * 0.6 + market_volatility * 0.4)

    def calculate_model_confidence(self, risk_score: float, apy: float, liquidity: float) -> float:
        risk_factor = 1 - risk_score
        apy_factor = min(apy / 20, 1)
        liquidity_factor = min(liquidity / 10000000, 1)

        confidence = (risk_factor * 0.4 + apy_factor * 0.3 + liquidity_factor * 0.3)
        return round(confidence, 2)

    def generate_yield_strategy(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
        """Generate yield strategy recommendation based on input data"""
        try:
            num_pools = len(input_data['available_liquidity_pools'])
            num_chains = len(input_data['gas_fees'])

            # Load model if not already loaded
            self.load_model(num_pools, num_chains)

            observation = self.prepare_observation(input_data)
            print(f"Observation shape: {observation.shape}")

            action, _ = self.model.predict(observation, deterministic=True)
            print(f"Model prediction (action): {action}")

            pool_idx = int(np.clip(action[0], 0, num_pools - 1))
            allocation = float(np.clip(action[1], 0.1, 0.9))

            selected_pool = input_data['available_liquidity_pools'][pool_idx]
            total_balance = input_data['portfolio_balance']
            investment_amount = round(total_balance * allocation)
            stablecoin_reserve = total_balance - investment_amount

            risk_score = self.calculate_risk_score(selected_pool, input_data['market_volatility'])
            confidence = self.calculate_model_confidence(risk_score, selected_pool['apy'], selected_pool['liquidity'])

            preferred_network = min(input_data['gas_fees'].items(), key=lambda x: x[1])[0]
            highest_gas = max(input_data['gas_fees'].values())

            recommendation = {
                "user_id": input_data["user_id"],
                "recommended_strategy": {
                    "pool": selected_pool["name"],
                    "investment_amount": investment_amount,
                    "expected_apy": selected_pool["apy"],
                    "risk_score": risk_score,
                    "justification": f"Selected {selected_pool['name']} due to "
                                   f"{'stable' if selected_pool['apy'] < 10 else 'high'} APY ({selected_pool['apy']}%), "
                                   f"{'low' if selected_pool['impermanent_loss_risk'] < 0.05 else 'moderate'} impermanent loss risk, "
                                   f"and {'high' if selected_pool['liquidity'] > 5000000 else 'adequate'} liquidity."
                },
                "hedging_strategy": {
                    "stablecoin_reserve": stablecoin_reserve,
                    "reason": f"Market volatility detected ({input_data['market_volatility']}). "
                             f"Allocating {round((stablecoin_reserve/total_balance)*100)}% of portfolio to USDC as a hedge."
                },
                "gas_optimization": {
                    "preferred_network": preferred_network,
                    "reason": f"Gas fees on {max(input_data['gas_fees'].items(), key=lambda x: x[1])[0]} "
                             f"are too high (${highest_gas}). Transaction will be executed on {preferred_network} "
                             f"for cost efficiency (${input_data['gas_fees'][preferred_network]})."
                },
                "execution_time": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
                "model_confidence": confidence
            }

            return recommendation

        except Exception as e:
            print(f"Error generating yield strategy: {str(e)}")
            import traceback
            traceback.print_exc()
            return None

def test_optimizer():
    # Create optimizer instance
    optimizer = DefiYieldOptimizer()

    # Test data
    test_input = {
        "user_id": "12345",
        "portfolio_balance": 5000,
        "current_strategy": "Medium-Risk",
        "available_liquidity_pools": [
            {
                "name": "Aave USDC",
                "apy": 6.5,
                "liquidity": 10000000,
                "impermanent_loss_risk": 0.01
            },
            {
                "name": "Uniswap ETH/USDT",
                "apy": 15.2,
                "liquidity": 5000000,
                "impermanent_loss_risk": 0.08
            },
            {
                "name": "Curve stETH/ETH",
                "apy": 9.1,
                "liquidity": 7500000,
                "impermanent_loss_risk": 0.04
            }
        ],
        "market_volatility": 0.05,
        "gas_fees": {
            "NEAR": 0.001,
            "Ethereum": 20
        }
    }

    # Generate and print recommendation
    recommendation = optimizer.generate_yield_strategy(test_input)
    if recommendation:
        print("\nAI Model Recommendation:")
        print(json.dumps(recommendation, indent=2))
    else:
        print("Failed to generate recommendation")

if __name__ == "__main__":
    test_optimizer()

Model loaded successfully
Observation shape: (46,)
Model prediction (action): [0.02892614 0.1       ]

AI Model Recommendation:
{
  "user_id": "12345",
  "recommended_strategy": {
    "pool": "Aave USDC",
    "investment_amount": 500,
    "expected_apy": 6.5,
    "risk_score": 0.026000000000000002,
    "justification": "Selected Aave USDC due to stable APY (6.5%), low impermanent loss risk, and high liquidity."
  },
  "hedging_strategy": {
    "stablecoin_reserve": 4500,
    "reason": "Market volatility detected (0.05). Allocating 90% of portfolio to USDC as a hedge."
  },
  "gas_optimization": {
    "preferred_network": "NEAR",
    "reason": "Gas fees on Ethereum are too high ($20). Transaction will be executed on NEAR for cost efficiency ($0.001)."
  },
  "execution_time": "2025-03-02T14:40:49Z",
  "model_confidence": 0.79
}




In [9]:
# Testing using various test scenarions

def run_test_scenarios():
    optimizer = DefiYieldOptimizer()

    # Test Scenarios
    test_scenarios = {
        "1. Conservative Market": {
            "input": {
                "user_id": "test_1",
                "portfolio_balance": 10000,
                "current_strategy": "Low-Risk",
                "available_liquidity_pools": [
                    {
                        "name": "Aave USDC",
                        "apy": 5.2,
                        "liquidity": 15000000,
                        "impermanent_loss_risk": 0.01
                    },
                    {
                        "name": "Compound USDT",
                        "apy": 6.8,
                        "liquidity": 12000000,
                        "impermanent_loss_risk": 0.02
                    },
                    {
                        "name": "Curve 3pool",
                        "apy": 4.5,
                        "liquidity": 20000000,
                        "impermanent_loss_risk": 0.01
                    }
                ],
                "market_volatility": 0.02,
                "gas_fees": {
                    "Polygon": 0.1,
                    "Ethereum": 15
                }
            },
            "expected_output": {
                "pool": "Aave USDC or Curve 3pool",
                "allocation_range": (0.3, 0.5),  # 30-50% allocation
                "confidence_range": (0.8, 0.9),  # High confidence
                "preferred_network": "Polygon"
            }
        },

        "2. High Volatility Market": {
            "input": {
                "user_id": "test_2",
                "portfolio_balance": 8000,
                "current_strategy": "Medium-Risk",
                "available_liquidity_pools": [
                    {
                        "name": "Aave USDC",
                        "apy": 8.5,
                        "liquidity": 10000000,
                        "impermanent_loss_risk": 0.01
                    },
                    {
                        "name": "Uniswap ETH/USDT",
                        "apy": 45.2,
                        "liquidity": 5000000,
                        "impermanent_loss_risk": 0.15
                    },
                    {
                        "name": "Balancer BTC/ETH",
                        "apy": 38.5,
                        "liquidity": 3000000,
                        "impermanent_loss_risk": 0.18
                    }
                ],
                "market_volatility": 0.45,
                "gas_fees": {
                    "Arbitrum": 0.5,
                    "Ethereum": 25
                }
            },
            "expected_output": {
                "pool": "Aave USDC",  # Should choose safest option
                "allocation_range": (0.1, 0.3),  # Low allocation due to high volatility
                "confidence_range": (0.5, 0.7),  # Lower confidence
                "preferred_network": "Arbitrum"
            }
        },

        "3. Optimal Yield Opportunity": {
            "input": {
                "user_id": "test_3",
                "portfolio_balance": 15000,
                "current_strategy": "High-Risk",
                "available_liquidity_pools": [
                    {
                        "name": "Curve USDC/USDT/DAI",
                        "apy": 12.5,
                        "liquidity": 25000000,
                        "impermanent_loss_risk": 0.02
                    },
                    {
                        "name": "Balancer WBTC/ETH/USDC",
                        "apy": 28.5,
                        "liquidity": 15000000,
                        "impermanent_loss_risk": 0.06
                    },
                    {
                        "name": "Uniswap ETH/USDC",
                        "apy": 18.2,
                        "liquidity": 8000000,
                        "impermanent_loss_risk": 0.08
                    }
                ],
                "market_volatility": 0.08,
                "gas_fees": {
                    "Optimism": 0.8,
                    "Base": 0.3,
                    "Ethereum": 18
                }
            },
            "expected_output": {
                "pool": "Balancer WBTC/ETH/USDC",  # Best risk-adjusted return
                "allocation_range": (0.4, 0.6),  # Medium allocation
                "confidence_range": (0.7, 0.85),  # Good confidence
                "preferred_network": "Base"
            }
        },

        "4. Low Liquidity Environment": {
            "input": {
                "user_id": "test_4",
                "portfolio_balance": 5000,
                "current_strategy": "Medium-Risk",
                "available_liquidity_pools": [
                    {
                        "name": "New DeFi Protocol",
                        "apy": 150.0,
                        "liquidity": 500000,
                        "impermanent_loss_risk": 0.25
                    },
                    {
                        "name": "Small Cap Pool",
                        "apy": 85.2,
                        "liquidity": 800000,
                        "impermanent_loss_risk": 0.15
                    },
                    {
                        "name": "Stable LP",
                        "apy": 12.1,
                        "liquidity": 2000000,
                        "impermanent_loss_risk": 0.03
                    }
                ],
                "market_volatility": 0.12,
                "gas_fees": {
                    "Polygon": 0.2,
                    "Ethereum": 22
                }
            },
            "expected_output": {
                "pool": "Stable LP",  # Should avoid high-risk, low-liquidity pools
                "allocation_range": (0.2, 0.4),  # Conservative allocation
                "confidence_range": (0.5, 0.7),  # Lower confidence due to market conditions
                "preferred_network": "Polygon"
            }
        },

        "5. Gas Crisis Scenario": {
            "input": {
                "user_id": "test_5",
                "portfolio_balance": 12000,
                "current_strategy": "Medium-Risk",
                "available_liquidity_pools": [
                    {
                        "name": "Aave USDC",
                        "apy": 7.5,
                        "liquidity": 18000000,
                        "impermanent_loss_risk": 0.01
                    },
                    {
                        "name": "Curve USDT/DAI",
                        "apy": 9.2,
                        "liquidity": 12000000,
                        "impermanent_loss_risk": 0.02
                    },
                    {
                        "name": "Uniswap ETH/USDC",
                        "apy": 15.8,
                        "liquidity": 8000000,
                        "impermanent_loss_risk": 0.07
                    }
                ],
                "market_volatility": 0.15,
                "gas_fees": {
                    "Ethereum": 150,  # Gas crisis
                    "Arbitrum": 12,
                    "Optimism": 8,
                    "Polygon": 0.5
                }
            },
            "expected_output": {
                "pool": "Curve USDT/DAI or Aave USDC",  # Balance of safety and yield
                "allocation_range": (0.3, 0.5),  # Medium allocation
                "confidence_range": (0.65, 0.8),  # Decent confidence
                "preferred_network": "Polygon"  # Lowest gas fees
            }
        }
    }

    def validate_recommendation(scenario_name, input_data, expected, actual):
        """Validate if the recommendation meets expected criteria"""
        pool_name = actual["recommended_strategy"]["pool"]
        allocation = actual["recommended_strategy"]["investment_amount"] / input_data["portfolio_balance"]
        confidence = actual["model_confidence"]
        network = actual["gas_optimization"]["preferred_network"]

        print(f"\nValidating {scenario_name}:")
        print(f"Pool Selection: {pool_name}")
        print(f"Expected Pool: {expected['pool']}")
        print(f"Allocation: {allocation:.2%} (Expected: {expected['allocation_range'][0]:.0%}-{expected['allocation_range'][1]:.0%})")
        print(f"Confidence: {confidence:.2f} (Expected: {expected['confidence_range'][0]:.2f}-{expected['confidence_range'][1]:.2f})")
        print(f"Network: {network} (Expected: {expected['preferred_network']})")

        # Validation results
        results = {
            "pool_match": "Expected" in pool_name or expected["pool"] in pool_name,
            "allocation_in_range": expected["allocation_range"][0] <= allocation <= expected["allocation_range"][1],
            "confidence_in_range": expected["confidence_range"][0] <= confidence <= expected["confidence_range"][1],
            "network_match": network == expected["preferred_network"]
        }

        return results

    # Run all scenarios
    for scenario_name, scenario_data in test_scenarios.items():
        print(f"\n{'='*50}")
        print(f"Testing Scenario: {scenario_name}")
        print(f"{'='*50}")

        recommendation = optimizer.generate_yield_strategy(scenario_data["input"])
        if recommendation:
            print("\nAI Model Recommendation:")
            print(json.dumps(recommendation, indent=2))

            # Validate results
            validation_results = validate_recommendation(
                scenario_name,
                scenario_data["input"],
                scenario_data["expected_output"],
                recommendation
            )

            print("\nValidation Results:")
            for key, value in validation_results.items():
                print(f"{key}: {'✓' if value else '✗'}")
        else:
            print(f"Failed to generate recommendation for {scenario_name}")

# Run the tests
if __name__ == "__main__":
    run_test_scenarios()


Testing Scenario: 1. Conservative Market
Model loaded successfully
Observation shape: (46,)
Model prediction (action): [0.02641101 0.1       ]

AI Model Recommendation:
{
  "user_id": "test_1",
  "recommended_strategy": {
    "pool": "Aave USDC",
    "investment_amount": 1000,
    "expected_apy": 5.2,
    "risk_score": 0.014,
    "justification": "Selected Aave USDC due to stable APY (5.2%), low impermanent loss risk, and high liquidity."
  },
  "hedging_strategy": {
    "stablecoin_reserve": 9000,
    "reason": "Market volatility detected (0.02). Allocating 90% of portfolio to USDC as a hedge."
  },
  "gas_optimization": {
    "preferred_network": "Polygon",
    "reason": "Gas fees on Ethereum are too high ($15). Transaction will be executed on Polygon for cost efficiency ($0.1)."
  },
  "execution_time": "2025-03-02T13:28:44Z",
  "model_confidence": 0.77
}

Validating 1. Conservative Market:
Pool Selection: Aave USDC
Expected Pool: Aave USDC or Curve 3pool
Allocation: 10.00% (Expect

Traceback (most recent call last):
  File "<ipython-input-8-108599751118>", line 103, in generate_yield_strategy
    observation = self.prepare_observation(input_data)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-8-108599751118>", line 79, in prepare_observation
    observation[43 + i] = fee / max_gas
    ~~~~~~~~~~~^^^^^^^^
IndexError: index 46 is out of bounds for axis 0 with size 46


In [32]:
# Taking best out of model1 and model2

import json
import numpy as np
import pandas as pd
from datetime import datetime
from stable_baselines3 import PPO
import gymnasium as gym
from gymnasium import spaces
from typing import Dict, Any, List
import os

# Specify your model paths here
MODEL_PATHS = [
    "/content/model1.zip",
    "/content/model2.zip"
]

class InferenceEnv(gym.Env):
    """Simplified environment for inference"""
    def __init__(self, num_pools, num_chains):
        super().__init__()
        self.num_pools = num_pools

        self.action_space = spaces.Box(
            low=np.array([0, 0.1], dtype=np.float32),
            high=np.array([8.0, 0.9], dtype=np.float32),
            dtype=np.float32
        )

        self.observation_space = spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=(46,),
            dtype=np.float32
        )

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        observation = np.zeros(self.observation_space.shape, dtype=np.float32)
        return observation, {}

    def step(self, action):
        observation = np.zeros(self.observation_space.shape, dtype=np.float32)
        return observation, 0, True, False, {}

class MultiModelOptimizer:
    """Class to handle multiple models and select the best recommendation"""
    def __init__(self, MODEL_PATHS: List[str]):
        self.optimizers = []
        print(f"\nInitializing {len(MODEL_PATHS)} models...")

        for i, path in enumerate(MODEL_PATHS):
            try:
                optimizer = DefiYieldOptimizer(path)
                self.optimizers.append(optimizer)
                print(f"Model {i+1} initialized successfully from: {path}")
            except Exception as e:
                print(f"Error initializing model {i+1} from {path}: {str(e)}")

        if not self.optimizers:
            raise ValueError("No models were successfully initialized")

    def evaluate_recommendation(self, recommendation: Dict) -> float:
        """Score a recommendation based on multiple factors"""
        if not recommendation:
            return -float('inf')

        strategy = recommendation['recommended_strategy']
        confidence = recommendation['model_confidence']

        # Extract metrics
        apy = strategy['expected_apy']
        risk_score = strategy['risk_score']
        investment_amount = strategy['investment_amount']

        # Calculate risk-adjusted return
        risk_adjusted_return = apy * (1 - risk_score)

        # Calculate allocation score (prefer higher allocations when confidence is high)
        portfolio_balance = investment_amount / (1 - recommendation['hedging_strategy']['stablecoin_reserve'])
        allocation_score = (investment_amount / portfolio_balance) * confidence

        # Calculate final score
        final_score = (risk_adjusted_return * 0.4 +
                      confidence * 0.3 +
                      allocation_score * 0.3)

        print(f"\nRecommendation Evaluation:")
        print(f"APY: {apy:.2f}%")
        print(f"Risk Score: {risk_score:.2f}")
        print(f"Confidence: {confidence:.2f}")
        print(f"Risk-Adjusted Return: {risk_adjusted_return:.2f}")
        print(f"Allocation Score: {allocation_score:.2f}")
        print(f"Final Score: {final_score:.2f}")

        return final_score

    def generate_best_strategy(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
        """Generate recommendations from all models and select the best one"""
        print("\nGenerating recommendations from multiple models...")

        recommendations = []
        scores = []

        for i, optimizer in enumerate(self.optimizers):
            print(f"\nGenerating recommendation from Model {i+1}...")
            try:
                recommendation = optimizer.generate_yield_strategy(input_data)
                if recommendation:
                    score = self.evaluate_recommendation(recommendation)
                    recommendations.append(recommendation)
                    scores.append(score)
                    print(f"Model {i+1} Score: {score:.2f}")
            except Exception as e:
                print(f"Error with Model {i+1}: {str(e)}")

        if not recommendations:
            print("No valid recommendations generated")
            return None

        # Select best recommendation
        best_idx = np.argmax(scores)
        best_recommendation = recommendations[best_idx]

        # Add comparison metadata
        best_recommendation['model_metadata'] = {
            'selected_model': f"Model {best_idx + 1}",
            'comparison_score': scores[best_idx],
            'total_models_compared': len(self.optimizers),
            'score_percentile': sum(scores[best_idx] >= np.array(scores)) / len(scores) * 100
        }

        print(f"\nSelected Model {best_idx + 1} as best model with score: {scores[best_idx]:.2f}")
        return best_recommendation

class DefiYieldOptimizer:
    """DeFi Investment Strategy Optimizer"""
    def __init__(self, MODEL_PATH: str):
        self.MODEL_PATH = MODEL_PATH
        self.model = None
        self.env = None
        print(f"Initialized DefiYieldOptimizer with model path: {MODEL_PATH}")

    def load_model(self, num_pools: int, num_chains: int):
        """Load the model once and reuse it"""
        if self.model is None:
            if not os.path.exists(self.MODEL_PATH):
                raise FileNotFoundError(f"Model not found at {self.MODEL_PATH}")

            self.env = InferenceEnv(num_pools=num_pools, num_chains=num_chains)
            self.model = PPO.load(self.MODEL_PATH, env=self.env)
            print(f"Model loaded successfully from {self.MODEL_PATH}")

    def prepare_observation(self, input_data: Dict[str, Any]) -> np.ndarray:
        """Convert input JSON data to model observation"""
        pools = input_data['available_liquidity_pools']
        observation = np.zeros(46, dtype=np.float32)

        max_liquidity = max(pool['liquidity'] for pool in pools)
        avg_apy = sum(pool['apy'] for pool in pools) / len(pools)

        for i, pool in enumerate(pools):
            base_idx = i * 15
            observation[base_idx] = pool['liquidity'] / max_liquidity
            observation[base_idx + 1] = pool['apy'] / avg_apy
            observation[base_idx + 2] = pool['impermanent_loss_risk']
            observation[base_idx + 3] = pool['liquidity'] / max_liquidity
            observation[base_idx + 4:base_idx + 15] = pool['liquidity'] / max_liquidity

        observation[45] = input_data['market_volatility']

        gas_fees = list(input_data['gas_fees'].values())
        max_gas = max(gas_fees)
        for i, fee in enumerate(gas_fees):
            observation[43 + i] = fee / max_gas

        return observation

    def calculate_risk_score(self, pool_data: Dict[str, Any], market_volatility: float) -> float:
        """Calculate risk score based on pool data and market conditions"""
        base_risk = pool_data['impermanent_loss_risk']
        volatility_impact = market_volatility * 0.4
        liquidity_factor = min(1.0, 10000000 / pool_data['liquidity']) * 0.2

        risk_score = (base_risk * 0.4 +
                     volatility_impact +
                     liquidity_factor)

        return min(1.0, max(0.0, risk_score))

    def calculate_model_confidence(self, risk_score: float, apy: float, liquidity: float) -> float:
        """Calculate model confidence score"""
        risk_factor = 1 - risk_score
        apy_factor = min(apy / 20, 1)
        liquidity_factor = min(liquidity / 10000000, 1)

        confidence = (risk_factor * 0.4 +
                     apy_factor * 0.3 +
                     liquidity_factor * 0.3)

        return round(min(1.0, max(0.0, confidence)), 2)


    def generate_yield_strategy(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
        """Generate yield strategy recommendation based on input data"""
        try:
            num_pools = len(input_data['available_liquidity_pools'])
            num_chains = len(input_data['gas_fees'])

            # Load model if not already loaded
            self.load_model(num_pools, num_chains)

            observation = self.prepare_observation(input_data)
            print(f"Observation shape: {observation.shape}")

            action, _ = self.model.predict(observation, deterministic=True)
            print(f"Model prediction (action): {action}")

            pool_idx = int(np.clip(action[0], 0, num_pools - 1))
            allocation = float(np.clip(action[1], 0.1, 0.9))

            selected_pool = input_data['available_liquidity_pools'][pool_idx]
            total_balance = input_data['portfolio_balance']
            investment_amount = round(total_balance * allocation)
            stablecoin_reserve = total_balance - investment_amount

            risk_score = self.calculate_risk_score(selected_pool, input_data['market_volatility'])
            confidence = self.calculate_model_confidence(risk_score, selected_pool['apy'], selected_pool['liquidity'])

            preferred_network = min(input_data['gas_fees'].items(), key=lambda x: x[1])[0]
            highest_gas = max(input_data['gas_fees'].values())

            # Generate detailed recommendation
            recommendation = {
                "user_id": input_data["user_id"],
                "recommended_strategy": {
                    "pool": selected_pool["name"],
                    "investment_amount": investment_amount,
                    "expected_apy": selected_pool["apy"],
                    "risk_score": risk_score,
                    "allocation_percentage": round(allocation * 100, 2),
                    "justification": self._generate_strategy_justification(
                        selected_pool,
                        risk_score,
                        input_data['market_volatility']
                    )
                },
                "hedging_strategy": {
                    "stablecoin_reserve": stablecoin_reserve,
                    "reserve_percentage": round((stablecoin_reserve/total_balance)*100, 2),
                    "reason": self._generate_hedging_justification(
                        input_data['market_volatility'],
                        stablecoin_reserve,
                        total_balance
                    )
                },
                "gas_optimization": {
                    "preferred_network": preferred_network,
                    "estimated_gas_saving": round(highest_gas - input_data['gas_fees'][preferred_network], 2),
                    "reason": self._generate_gas_justification(
                        preferred_network,
                        input_data['gas_fees']
                    )
                },
                "execution_time": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
                "model_confidence": confidence,
                "market_conditions": {
                    "volatility": input_data['market_volatility'],
                    "risk_assessment": self._assess_market_conditions(input_data['market_volatility'])
                }
            }

            return recommendation

        except Exception as e:
            print(f"Error generating yield strategy: {str(e)}")
            import traceback
            traceback.print_exc()
            return None

    def _generate_strategy_justification(self, pool: Dict, risk_score: float, market_volatility: float) -> str:
        """Generate detailed justification for pool selection"""
        risk_level = "low" if risk_score < 0.05 else "moderate" if risk_score < 0.1 else "high"
        apy_level = "stable" if pool['apy'] < 10 else "high" if pool['apy'] < 20 else "very high"
        liquidity_level = "high" if pool['liquidity'] > 5000000 else "adequate"

        return (f"Selected {pool['name']} due to {apy_level} APY ({pool['apy']}%), "
                f"{risk_level} risk profile (score: {risk_score:.2f}), "
                f"and {liquidity_level} liquidity (${pool['liquidity']:,.0f}). "
                f"Market volatility is {market_volatility:.2%}.")

    def _generate_hedging_justification(self, volatility: float, reserve: float, total: float) -> str:
        """Generate justification for hedging strategy"""
        reserve_percentage = (reserve/total) * 100
        if volatility > 0.1:
            risk_level = "high"
        elif volatility > 0.05:
            risk_level = "moderate"
        else:
            risk_level = "low"

        return (f"Market volatility is {risk_level} ({volatility:.1%}). "
                f"Maintaining {reserve_percentage:.1f}% (${reserve:,.2f}) as stable reserve "
                f"for risk management.")

    def _generate_gas_justification(self, preferred_network: str, gas_fees: Dict) -> str:
        """Generate justification for network selection"""
        highest_network = max(gas_fees.items(), key=lambda x: x[1])[0]
        savings = gas_fees[highest_network] - gas_fees[preferred_network]

        return (f"Selected {preferred_network} for transaction execution due to "
                f"optimal gas fees (${gas_fees[preferred_network]:.2f}). "
                f"Estimated savings of ${savings:.2f} compared to {highest_network}.")

    def _assess_market_conditions(self, volatility: float) -> str:
        """Assess overall market conditions"""
        if volatility > 0.1:
            return "High volatility - Exercise caution"
        elif volatility > 0.05:
            return "Moderate volatility - Standard precautions"
        else:
            return "Low volatility - Favorable conditions"

# Test Scenarios
test_scenarios = {
    "1. Conservative Market": {
        "input": {
            "user_id": "test_1",
            "portfolio_balance": 10000,
            "current_strategy": "Low-Risk",
            "available_liquidity_pools": [
                {
                    "name": "Aave USDC",
                    "apy": 5.2,
                    "liquidity": 15000000,
                    "impermanent_loss_risk": 0.01
                },
                {
                    "name": "Compound USDT",
                    "apy": 6.8,
                    "liquidity": 12000000,
                    "impermanent_loss_risk": 0.02
                },
                {
                    "name": "Curve 3pool",
                    "apy": 4.5,
                    "liquidity": 20000000,
                    "impermanent_loss_risk": 0.01
                }
            ],
            "market_volatility": 0.02,
            "gas_fees": {
                "Polygon": 0.1,
                "Ethereum": 15
            }
        },
        "expected_output": {
            "pool": "Aave USDC or Curve 3pool",
            "allocation_range": (0.3, 0.5),
            "confidence_range": (0.8, 0.9),
            "preferred_network": "Polygon"
        }
    },

    "2. High Volatility Market": {
        "input": {
            "user_id": "test_2",
            "portfolio_balance": 8000,
            "current_strategy": "Medium-Risk",
            "available_liquidity_pools": [
                {
                    "name": "Aave USDC",
                    "apy": 8.5,
                    "liquidity": 10000000,
                    "impermanent_loss_risk": 0.01
                },
                {
                    "name": "Uniswap ETH/USDT",
                    "apy": 45.2,
                    "liquidity": 5000000,
                    "impermanent_loss_risk": 0.15
                },
                {
                    "name": "Balancer BTC/ETH",
                    "apy": 38.5,
                    "liquidity": 3000000,
                    "impermanent_loss_risk": 0.18
                }
            ],
            "market_volatility": 0.45,
            "gas_fees": {
                "Arbitrum": 0.5,
                "Ethereum": 25
            }
        },
        "expected_output": {
            "pool": "Aave USDC",
            "allocation_range": (0.1, 0.3),
            "confidence_range": (0.5, 0.7),
            "preferred_network": "Arbitrum"
        }
    }
}


def validate_recommendation(scenario_name: str, input_data: Dict, expected: Dict, actual: Dict) -> Dict:
    """Validate if the recommendation meets expected criteria"""
    pool_name = actual["recommended_strategy"]["pool"]
    allocation = actual["recommended_strategy"]["investment_amount"] / input_data["portfolio_balance"]
    confidence = actual["model_confidence"]
    network = actual["gas_optimization"]["preferred_network"]

    print(f"\nValidating {scenario_name}:")
    print(f"Pool Selection: {pool_name}")
    print(f"Expected Pool: {expected['pool']}")
    print(f"Allocation: {allocation:.2%} (Expected: {expected['allocation_range'][0]:.0%}-{expected['allocation_range'][1]:.0%})")
    print(f"Confidence: {confidence:.2f} (Expected: {expected['confidence_range'][0]:.2f}-{expected['confidence_range'][1]:.2f})")
    print(f"Network: {network} (Expected: {expected['preferred_network']})")

    results = {
        "pool_match": "Expected" in pool_name or expected["pool"] in pool_name,
        "allocation_in_range": expected["allocation_range"][0] <= allocation <= expected["allocation_range"][1],
        "confidence_in_range": expected["confidence_range"][0] <= confidence <= expected["confidence_range"][1],
        "network_match": network == expected["preferred_network"]
    }

    return results

def test_multi_model_optimizer():
    """Test the multi-model optimizer with a single case"""
    print("\n" + "="*50)
    print("Testing Multi-Model Optimizer with Single Case")
    print("="*50)

    multi_optimizer = MultiModelOptimizer(MODEL_PATHS)

    test_input = {
        "user_id": "12345",
        "portfolio_balance": 5000,
        "current_strategy": "Medium-Risk",
        "available_liquidity_pools": [
            {
                "name": "Aave USDC",
                "apy": 6.5,
                "liquidity": 10000000,
                "impermanent_loss_risk": 0.01
            },
            {
                "name": "Uniswap ETH/USDT",
                "apy": 15.2,
                "liquidity": 5000000,
                "impermanent_loss_risk": 0.08
            },
            {
                "name": "Curve stETH/ETH",
                "apy": 9.1,
                "liquidity": 7500000,
                "impermanent_loss_risk": 0.04
            }
        ],
        "market_volatility": 0.05,
        "gas_fees": {
            "NEAR": 0.001,
            "Ethereum": 20
        }
    }

    best_recommendation = multi_optimizer.generate_best_strategy(test_input)
    if best_recommendation:
        print("\nBest Model Recommendation:")
        print(json.dumps(best_recommendation, indent=2))
    else:
        print("Failed to generate recommendation")

def run_multi_model_test_scenarios():
    """Run comprehensive tests with multiple scenarios"""
    print("\n" + "="*50)
    print("Running Multi-Model Test Scenarios")
    print("="*50)

    multi_optimizer = MultiModelOptimizer(MODEL_PATHS)

    scenario_results = {}

    for scenario_name, scenario_data in test_scenarios.items():
        print(f"\n{'='*50}")
        print(f"Testing Scenario: {scenario_name}")
        print(f"{'='*50}")

        best_recommendation = multi_optimizer.generate_best_strategy(scenario_data["input"])

        if best_recommendation:
            print("\nBest Model Recommendation:")
            print(json.dumps(best_recommendation, indent=2))

            validation_results = validate_recommendation(
                scenario_name,
                scenario_data["input"],
                scenario_data["expected_output"],
                best_recommendation
            )

            scenario_results[scenario_name] = {
                "validation": validation_results,
                "selected_model": best_recommendation["model_metadata"]["selected_model"],
                "confidence": best_recommendation["model_confidence"],
                "comparison_score": best_recommendation["model_metadata"]["comparison_score"]
            }

            print("\nValidation Results:")
            for key, value in validation_results.items():
                print(f"{key}: {'✓' if value else '✗'}")
        else:
            print(f"Failed to generate recommendation for {scenario_name}")
            scenario_results[scenario_name] = {"error": "Failed to generate recommendation"}

    return scenario_results

def analyze_results(scenario_results: Dict):
    """Analyze and summarize test results"""
    print("\n" + "="*50)
    print("Test Results Analysis")
    print("="*50)

    total_scenarios = len(scenario_results)
    successful_scenarios = sum(1 for result in scenario_results.values() if "error" not in result)

    print(f"\nTotal Scenarios: {total_scenarios}")
    print(f"Successful Scenarios: {successful_scenarios}")
    print(f"Success Rate: {(successful_scenarios/total_scenarios)*100:.1f}%")

    if successful_scenarios > 0:
        validation_stats = {
            "pool_match": 0,
            "allocation_in_range": 0,
            "confidence_in_range": 0,
            "network_match": 0
        }

        model_selection_count = {}
        avg_confidence = 0
        avg_score = 0

        for scenario, result in scenario_results.items():
            if "error" not in result:
                for metric, value in result["validation"].items():
                    validation_stats[metric] += int(value)

                model = result["selected_model"]
                model_selection_count[model] = model_selection_count.get(model, 0) + 1
                avg_confidence += result["confidence"]
                avg_score += result["comparison_score"]

        print("\nValidation Metrics:")
        for metric, count in validation_stats.items():
            success_rate = (count/successful_scenarios)*100
            print(f"{metric}: {success_rate:.1f}% success rate")

        print("\nModel Selection Distribution:")
        for model, count in model_selection_count.items():
            percentage = (count/successful_scenarios)*100
            print(f"{model}: {percentage:.1f}% of scenarios")

        print(f"\nAverage Confidence: {avg_confidence/successful_scenarios:.2f}")
        print(f"Average Comparison Score: {avg_score/successful_scenarios:.2f}")

def main():
    """Main execution function"""
    print("\nStarting DeFi Investment Strategy Multi-Model Optimization")

    # Run single test case
    print("\nRunning single test case...")
    test_multi_model_optimizer()

    # Run multiple test scenarios
    print("\nRunning multiple test scenarios...")
    scenario_results = run_multi_model_test_scenarios()

    # Analyze results
    analyze_results(scenario_results)

if __name__ == "__main__":
    main()


Starting DeFi Investment Strategy Multi-Model Optimization

Running single test case...

Testing Multi-Model Optimizer with Single Case

Initializing 2 models...
Initialized DefiYieldOptimizer with model path: /content/model1.zip
Model 1 initialized successfully from: /content/model1.zip
Initialized DefiYieldOptimizer with model path: /content/model2.zip
Model 2 initialized successfully from: /content/model2.zip

Generating recommendations from multiple models...

Generating recommendation from Model 1...
Model loaded successfully from /content/model1.zip
Observation shape: (46,)
Model prediction (action): [0.00862982 0.1       ]

Recommendation Evaluation:
APY: 6.50%
Risk Score: 0.22
Confidence: 0.71
Risk-Adjusted Return: 5.04
Allocation Score: -3194.29
Final Score: -956.06
Model 1 Score: -956.06

Generating recommendation from Model 2...
Model loaded successfully from /content/model2.zip
Observation shape: (46,)
Model prediction (action): [0.02892614 0.1       ]

Recommendation Eval

In [30]:
# simple inference without RL models

import json
import pandas as pd
from datetime import datetime

def load_defi_data(file_path):
    """Load historical DeFi data from CSV."""
    return pd.read_csv(file_path)

def evaluate_pools(available_pools, market_volatility):
    """Evaluate pools based on APY, liquidity, and risk."""
    best_pool = None
    best_score = -float('inf')

    for pool in available_pools:
        score = pool["apy"] - (pool["impermanent_loss_risk"] * 100) + (pool["liquidity"] / 1e6)

        if score > best_score:
            best_score = score
            best_pool = pool

    return best_pool

def determine_hedging(portfolio_balance, market_volatility):
    """Allocate stablecoin reserves based on volatility."""
    reserve_percentage = 0.2 if market_volatility > 0.03 else 0.1
    return round(portfolio_balance * reserve_percentage, 2)

def optimize_gas_fees(gas_fees):
    """Select the network with the lowest gas fees."""
    return min(gas_fees, key=gas_fees.get)

def generate_recommendation(request_json, defi_data):
    """Generate a DeFi investment recommendation based on input data."""
    user_id = request_json["user_id"]
    portfolio_balance = request_json["portfolio_balance"]
    available_pools = request_json["available_liquidity_pools"]
    market_volatility = request_json["market_volatility"]
    gas_fees = request_json["gas_fees"]

    best_pool = evaluate_pools(available_pools, market_volatility)
    investment_amount = round(portfolio_balance * 0.8, 2)  # Allocate 80% to investment
    stablecoin_reserve = determine_hedging(portfolio_balance, market_volatility)
    preferred_network = optimize_gas_fees(gas_fees)

    response = {
        "user_id": user_id,
        "recommended_strategy": {
            "pool": best_pool["name"],
            "investment_amount": investment_amount,
            "expected_apy": best_pool["apy"],
            "risk_score": best_pool["impermanent_loss_risk"],
            "justification": f"Selected {best_pool['name']} due to stable APY, low impermanent loss risk, and high liquidity."
        },
        "hedging_strategy": {
            "stablecoin_reserve": stablecoin_reserve,
            "reason": f"Market volatility detected ({market_volatility}). Allocating {round((stablecoin_reserve/portfolio_balance)*100,2)}% of portfolio to USDC as a hedge."
        },
        "gas_optimization": {
            "preferred_network": preferred_network,
            "reason": f"Gas fees on {preferred_network} are lowest at ${gas_fees[preferred_network]}."
        },
        "execution_time": datetime.utcnow().isoformat() + "Z",
        "model_confidence": round(0.9 + (best_pool["apy"] / 100), 2)
    }

    return json.dumps(response, indent=4)

# Example Usage
if __name__ == "__main__":
    defi_data = load_defi_data("/content/realistic_defi_yield_training_dataset_20000.csv")

    input_json = {
        "user_id": "12345",
        "portfolio_balance": 5000,
        "current_strategy": "Medium-Risk",
        "available_liquidity_pools": [
            {"name": "Aave USDC", "apy": 6.5, "liquidity": 10000000, "impermanent_loss_risk": 0.01},
            {"name": "Uniswap ETH/USDT", "apy": 15.2, "liquidity": 5000000, "impermanent_loss_risk": 0.08},
            {"name": "Curve stETH/ETH", "apy": 9.1, "liquidity": 7500000, "impermanent_loss_risk": 0.04}
        ],
        "market_volatility": 0.05,
        "gas_fees": {"NEAR": 0.001, "Ethereum": 20}
    }

    result = generate_recommendation(input_json, defi_data)
    print(result)

{
    "user_id": "12345",
    "recommended_strategy": {
        "pool": "Aave USDC",
        "investment_amount": 4000.0,
        "expected_apy": 6.5,
        "risk_score": 0.01,
        "justification": "Selected Aave USDC due to stable APY, low impermanent loss risk, and high liquidity."
    },
    "hedging_strategy": {
        "stablecoin_reserve": 1000.0,
        "reason": "Market volatility detected (0.05). Allocating 20.0% of portfolio to USDC as a hedge."
    },
    "gas_optimization": {
        "preferred_network": "NEAR",
        "reason": "Gas fees on NEAR are lowest at $0.001."
    },
    "execution_time": "2025-03-02T14:44:08.472101Z",
    "model_confidence": 0.97
}


In [31]:
# Combining code from testing with and without RL model

import json
import numpy as np
from typing import Dict, Any
from datetime import datetime

def compare_recommendations(actual_output: Dict[str, Any], expected_output: Dict[str, Any]) -> Dict[str, Any]:
    """Compare actual and expected recommendations and calculate similarity scores"""

    comparison_results = {
        "pool_selection": {
            "match": False,
            "actual": "",
            "expected": "",
            "score": 0.0
        },
        "investment_strategy": {
            "amount_difference_percentage": 0.0,
            "risk_score_difference": 0.0,
            "score": 0.0
        },
        "hedging_strategy": {
            "reserve_difference_percentage": 0.0,
            "score": 0.0
        },
        "gas_optimization": {
            "network_match": False,
            "score": 0.0
        },
        "overall_confidence": {
            "difference": 0.0,
            "score": 0.0
        },
        "total_similarity_score": 0.0
    }

    try:
        # Compare pool selection
        actual_pool = actual_output["recommended_strategy"]["pool"]
        expected_pool = expected_output["recommended_strategy"]["pool"]
        comparison_results["pool_selection"]["actual"] = actual_pool
        comparison_results["pool_selection"]["expected"] = expected_pool
        comparison_results["pool_selection"]["match"] = actual_pool == expected_pool
        comparison_results["pool_selection"]["score"] = 1.0 if actual_pool == expected_pool else 0.0

        # Compare investment amounts
        actual_amount = actual_output["recommended_strategy"]["investment_amount"]
        expected_amount = expected_output["recommended_strategy"]["investment_amount"]
        amount_diff_pct = abs(actual_amount - expected_amount) / expected_amount
        comparison_results["investment_strategy"]["amount_difference_percentage"] = amount_diff_pct
        comparison_results["investment_strategy"]["score"] = max(0, 1 - amount_diff_pct)

        # Compare risk scores
        actual_risk = actual_output["recommended_strategy"]["risk_score"]
        expected_risk = expected_output["recommended_strategy"]["risk_score"]
        risk_diff = abs(actual_risk - expected_risk)
        comparison_results["investment_strategy"]["risk_score_difference"] = risk_diff
        comparison_results["investment_strategy"]["score"] = max(0, 1 - risk_diff)

        # Compare hedging strategy
        actual_reserve = actual_output["hedging_strategy"]["stablecoin_reserve"]
        expected_reserve = expected_output["hedging_strategy"]["stablecoin_reserve"]
        reserve_diff_pct = abs(actual_reserve - expected_reserve) / expected_reserve
        comparison_results["hedging_strategy"]["reserve_difference_percentage"] = reserve_diff_pct
        comparison_results["hedging_strategy"]["score"] = max(0, 1 - reserve_diff_pct)

        # Compare gas optimization
        actual_network = actual_output["gas_optimization"]["preferred_network"]
        expected_network = expected_output["gas_optimization"]["preferred_network"]
        comparison_results["gas_optimization"]["network_match"] = actual_network == expected_network
        comparison_results["gas_optimization"]["score"] = 1.0 if actual_network == expected_network else 0.0

        # Compare confidence scores
        actual_confidence = actual_output.get("model_confidence", 0)
        expected_confidence = expected_output.get("model_confidence", 0)
        confidence_diff = abs(actual_confidence - expected_confidence)
        comparison_results["overall_confidence"]["difference"] = confidence_diff
        comparison_results["overall_confidence"]["score"] = max(0, 1 - confidence_diff)

        # Calculate total similarity score (weighted average)
        weights = {
            "pool_selection": 0.3,
            "investment_strategy": 0.25,
            "hedging_strategy": 0.2,
            "gas_optimization": 0.15,
            "overall_confidence": 0.1
        }

        total_score = (
            weights["pool_selection"] * comparison_results["pool_selection"]["score"] +
            weights["investment_strategy"] * comparison_results["investment_strategy"]["score"] +
            weights["hedging_strategy"] * comparison_results["hedging_strategy"]["score"] +
            weights["gas_optimization"] * comparison_results["gas_optimization"]["score"] +
            weights["overall_confidence"] * comparison_results["overall_confidence"]["score"]
        )

        comparison_results["total_similarity_score"] = round(total_score, 4)

    except Exception as e:
        print(f"Error during comparison: {str(e)}")
        comparison_results["error"] = str(e)

    return comparison_results

def print_comparison_results(comparison_results: Dict[str, Any]):
    """Print formatted comparison results"""
    print("\n" + "="*50)
    print("RECOMMENDATION COMPARISON RESULTS")
    print("="*50)

    # Pool Selection
    print("\n1. Pool Selection:")
    print(f"Expected Pool: {comparison_results['pool_selection']['expected']}")
    print(f"Actual Pool: {comparison_results['pool_selection']['actual']}")
    print(f"Match: {'✓' if comparison_results['pool_selection']['match'] else '✗'}")
    print(f"Score: {comparison_results['pool_selection']['score']:.2f}")

    # Investment Strategy
    print("\n2. Investment Strategy:")
    print(f"Amount Difference: {comparison_results['investment_strategy']['amount_difference_percentage']*100:.1f}%")
    print(f"Risk Score Difference: {comparison_results['investment_strategy']['risk_score_difference']:.3f}")
    print(f"Score: {comparison_results['investment_strategy']['score']:.2f}")

    # Hedging Strategy
    print("\n3. Hedging Strategy:")
    print(f"Reserve Difference: {comparison_results['hedging_strategy']['reserve_difference_percentage']*100:.1f}%")
    print(f"Score: {comparison_results['hedging_strategy']['score']:.2f}")

    # Gas Optimization
    print("\n4. Gas Optimization:")
    print(f"Network Match: {'✓' if comparison_results['gas_optimization']['network_match'] else '✗'}")
    print(f"Score: {comparison_results['gas_optimization']['score']:.2f}")

    # Confidence
    print("\n5. Model Confidence:")
    print(f"Difference: {comparison_results['overall_confidence']['difference']:.2f}")
    print(f"Score: {comparison_results['overall_confidence']['score']:.2f}")

    # Overall Score
    print("\n" + "="*50)
    print(f"TOTAL SIMILARITY SCORE: {comparison_results['total_similarity_score']:.2%}")
    print("="*50)

def main():
    """Main execution function to compare both implementations"""
    # Test input data
    test_input = {
        "user_id": "12345",
        "portfolio_balance": 5000,
        "current_strategy": "Medium-Risk",
        "available_liquidity_pools": [
            {
                "name": "Aave USDC",
                "apy": 6.5,
                "liquidity": 10000000,
                "impermanent_loss_risk": 0.01
            },
            {
                "name": "Uniswap ETH/USDT",
                "apy": 15.2,
                "liquidity": 5000000,
                "impermanent_loss_risk": 0.08
            },
            {
                "name": "Curve stETH/ETH",
                "apy": 9.1,
                "liquidity": 7500000,
                "impermanent_loss_risk": 0.04
            }
        ],
        "market_volatility": 0.05,
        "gas_fees": {
            "NEAR": 0.001,
            "Ethereum": 20
        }
    }

    try:
        # Get recommendations from both implementations
        print("Generating recommendations from both implementations...")

        # Initialize optimizers
        multi_optimizer = MultiModelOptimizer(MODEL_PATHS)
        actual_recommendation = multi_optimizer.generate_best_strategy(test_input)

        # Generate expected recommendation
        expected_recommendation = json.loads(generate_recommendation(test_input, None))

        if actual_recommendation and expected_recommendation:
            # Compare recommendations
            comparison_results = compare_recommendations(actual_recommendation, expected_recommendation)

            # Print detailed results
            print_comparison_results(comparison_results)

            # Save results to file
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            results_file = f"comparison_results_{timestamp}.json"

            with open(results_file, 'w') as f:
                json.dump({
                    "test_input": test_input,
                    "actual_recommendation": actual_recommendation,
                    "expected_recommendation": expected_recommendation,
                    "comparison_results": comparison_results
                }, f, indent=2)

            print(f"\nDetailed results saved to: {results_file}")
        else:
            print("Error: Failed to generate recommendations from one or both implementations")

    except Exception as e:
        print(f"Error in main execution: {str(e)}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

Generating recommendations from both implementations...

Initializing 2 models...
Initialized DefiYieldOptimizer with model path: /content/model1.zip
Model 1 initialized successfully from: /content/model1.zip
Initialized DefiYieldOptimizer with model path: /content/model2.zip
Model 2 initialized successfully from: /content/model2.zip

Generating recommendations from multiple models...

Generating recommendation from Model 1...
Model loaded successfully from /content/model1.zip
Observation shape: (46,)
Model prediction (action): [0.00862982 0.1       ]

Recommendation Evaluation:
APY: 6.50%
Risk Score: 0.22
Confidence: 0.71
Risk-Adjusted Return: 5.04
Allocation Score: -3194.29
Final Score: -956.06
Model 1 Score: -956.06

Generating recommendation from Model 2...
Model loaded successfully from /content/model2.zip
Observation shape: (46,)
Model prediction (action): [0.02892614 0.1       ]

Recommendation Evaluation:
APY: 6.50%
Risk Score: 0.22
Confidence: 0.71
Risk-Adjusted Return: 5.04
A

