In [1]:
from __future__ import annotations

import os
import time
import numpy as np
import tensorflow as tf


from system.mec_env_var import MecTermRL_IDE, MecSvrEnv

def _setup_tf():
    """Setup TensorFlow GPU memory growth"""
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            print(f"✓ Found {len(gpus)} GPU(s). Memory growth enabled.")
        except Exception as e:
            print(f"Warning: GPU Setup Error: {e}")

def main():
    _setup_tf()

    
    MAX_EPISODE = 1500
    MAX_EPISODE_LEN = 200
    NUM_R = 4
    SIGMA2 = 1e-9
    
    
    ide_config = {
        
        'pop_size': 15,
        'gen': 3,
        'pbest_rate': 0.25,
        'archive_size': 15,
        'memory_size': 3,
        'F_init': 0.2,
        'CR_init': 0.9,
        
        'warmup_episodes': 500,          
        'phase_transition':1000,        
        'conservative_interval': 30,     
        'aggressive_interval': 20,       
        'min_buffer_size': 20000,        
        'decay_episodes': 500,          
        
        
    }

    
    config = {'state_dim': 3, 'action_dim': 2}
    
   
    train_config = {
        'agent_type': 'ddpg',
        'minibatch_size': 64,
        'actor_lr': 1e-4, 
        'critic_lr': 1e-3,
        'tau': 0.001, 
        'gamma': 0.99,
        'buffer_size': 250000,
        'random_seed': int(time.time()),
        'noise_sigma': 0.12,
        'sigma2': SIGMA2,
        'is_training': True,
    }

    # Paths
    trial_name = "ddpg_ide_deterministic"  
    res_path = f'results_{trial_name}/'
    model_root = f'models_{trial_name}/'
    os.makedirs(res_path, exist_ok=True)
    os.makedirs(model_root, exist_ok=True)

    # User Settings
    user_settings = [
        {'rate': 1.0, 'dis': 100},
        {'rate': 2.0, 'dis': 100},
        {'rate': 3.0, 'dis': 100}
    ]

   
    for k in range(7):
        print(f"\n{'='*70}")
        print(f"STARTING TRIAL {k} - DDPG+ IDE Deterministic")
        print(f"{'='*70}")
        print(f"Config: Warmup={ide_config['warmup_episodes']} episodes, "
              f"Conservative={ide_config['conservative_interval']} steps (3.3%), "
              f"Aggressive={ide_config['aggressive_interval']} steps (5.0%)")
        print(f"{'='*70}\n")
        
        model_prefix = os.path.join(model_root, f"trial_{k}")

        
        user_list = []
        for idx, sett in enumerate(user_settings):
            u_cfg = sett.copy()
            u_cfg.update({
                'id': str(idx+1), 
                'model': 'AR', 
                'num_r': NUM_R,
                'action_bound': 2, 
                'data_buf_size': 100, 
                't_factor': 0.5, 
                'penalty': 1000,
                **config
            })
            u_cfg['ckpt_dir'] = f"{model_prefix}_user_{idx+1}"
            
            u = MecTermRL_IDE(u_cfg, train_config, ide_config)
            user_list.append(u)
            print(f"✓ User {u.user_id} initialized (DDPG + IDE Deterministic)")

        
        env = MecSvrEnv(user_list, NUM_R, SIGMA2, MAX_EPISODE_LEN)
        env.init_target_networks()
        print(f"✓ Environment initialized ({NUM_R} antennas)\n")

        # Metrics Storage
        res_r, res_p, res_b, res_o, res_d = [], [], [], [], []
        
        
        ide_stats_history = []          
        per_user_ide_history = [[] for _ in range(len(user_list))]  
        start_time = time.time()
        total_step_counter = 0
        
        
        print(f"{'='*70}")
        print("TRAINING STARTED")
        print(f"{'='*70}\n")
        
        for i in range(MAX_EPISODE):
            env.reset(is_train=True)
            
            # Episode metrics
            cur_r_ep = np.zeros(len(user_list), dtype=np.float32)
            cur_p_ep = np.zeros(len(user_list), dtype=np.float32)
            cur_ds_ep = np.zeros(len(user_list), dtype=np.float32)
            cur_of_ep = np.zeros(len(user_list), dtype=np.float32)
            
            
            ep_ide_counts = np.zeros(len(user_list), dtype=int)
            ep_ide_decisions = [[] for _ in range(len(user_list))]  

            for j in range(MAX_EPISODE_LEN):
                
                powers_list = []
                noises_list = []
                
                for u_idx, u in enumerate(user_list):
                    u.current_episode = i
                    u.total_steps = total_step_counter
                    
                    
                    power, noise = u.predict(is_random=True, step_in_episode=j)
                    powers_list.append(power)
                    noises_list.append(noise)
                    
                    
                    info = u.get_last_action_info()
                    if info['used_ide']:
                        ep_ide_counts[u_idx] += 1
                        ep_ide_decisions[u_idx].append(j)  

                external_powers = np.stack(powers_list)
                external_noises = np.stack(noises_list)

                
                (rewards, done, powers, wasted, _, offload, process, arrive, 
                 buffers, channels, overflows) = env.step(
                     is_random=True, 
                     external_powers=external_powers,
                     external_noises=external_noises
                )

                cur_r_ep += rewards
                cur_p_ep += powers
                cur_ds_ep += buffers
                cur_of_ep += overflows
                total_step_counter += 1

                if done:
                    
                    res_r.append(cur_r_ep / MAX_EPISODE_LEN)
                    res_p.append(cur_p_ep / MAX_EPISODE_LEN)
                    res_b.append(cur_ds_ep / MAX_EPISODE_LEN)
                    res_o.append(cur_of_ep / MAX_EPISODE_LEN)
                    res_d.append(buffers)
                    ide_stats_history.append(ep_ide_counts.copy())
                    
                    
                    for u_idx in range(len(user_list)):
                        per_user_ide_history[u_idx].append(ep_ide_counts[u_idx])
                    
                    
                    if (i + 1) % 50 == 0 or i == 0:
                        window_size = min(50, len(res_r))
                        
                        # Get last N episodes
                        recent_rewards = np.array(res_r[-window_size:])
                        recent_powers = np.array(res_p[-window_size:])
                        recent_buffers = np.array(res_b[-window_size:])
                        recent_ide = np.array(ide_stats_history[-window_size:])
                        
                        # Calculate averages
                        avg_reward = np.mean(recent_rewards, axis=0)
                        avg_power = np.mean(recent_powers, axis=0)
                        avg_buffer = np.mean(recent_buffers, axis=0)
                        avg_ide_counts = np.mean(recent_ide, axis=0)
                        
                        
                        print(f"\n{'━'*70}")
                        print(f"Episode {i+1} | Avg of Last {window_size} Episodes")
                        print(f"{'━'*70}")
                        print(f"Reward:  {avg_reward}")
                        print(f"Power:   {avg_power}")
                        print(f"Buffer:  {avg_buffer}")
                        
                        
                        print(f"\n{'─'*70}")
                        print(f"IDE USAGE ANALYTICS")
                        print(f"{'─'*70}")
                        
                        
                        for u_idx, u in enumerate(user_list):
                            stats = u.get_ide_stats()
                            
                            
                            user_ide_count = avg_ide_counts[u_idx]
                            user_usage_pct = (user_ide_count / MAX_EPISODE_LEN) * 100
                            
                            print(f"\nUser {u.user_id}:")
                            print(f"  ├─ IDE Calls (avg): {user_ide_count:.1f}/{MAX_EPISODE_LEN} "
                                  f"({user_usage_pct:.2f}%)")
                            print(f"  ├─ Current Episode: {ep_ide_counts[u_idx]}/{MAX_EPISODE_LEN} "
                                  f"({(ep_ide_counts[u_idx]/MAX_EPISODE_LEN)*100:.2f}%)")
                            print(f"  ├─ Buffer Size:     {stats['replay_buffer_size']:,}")
                            print(f"  ├─ Current Phase:   {stats['current_phase']}")
                            print(f"  ├─ IDE Interval:    {stats['current_interval']} steps")
                            print(f"  ├─ Blend Alpha:     {stats['current_blend_alpha']:.3f}")
                            print(f"  └─ Total IDE Apps:  {stats['ide_applications']}")
                            
                           
                            if len(ep_ide_decisions[u_idx]) > 0:
                                sample_steps = ep_ide_decisions[u_idx][:5]
                                print(f"     └─ IDE @ steps: {sample_steps}{'...' if len(ep_ide_decisions[u_idx]) > 5 else ''}")
                        
                        
                        total_possible = len(user_list) * MAX_EPISODE_LEN
                        avg_ide_pct = (np.sum(avg_ide_counts) / total_possible) * 100
                        current_ep_ide_pct = (np.sum(ep_ide_counts) / total_possible) * 100
                        
                        print(f"\n{'─'*70}")
                        print(f" Overall IDE Usage:")
                        print(f"  ├─ Avg (Last {window_size}): {avg_ide_pct:.2f}% "
                              f"[U1: {(avg_ide_counts[0]/MAX_EPISODE_LEN)*100:.1f}%, "
                              f"U2: {(avg_ide_counts[1]/MAX_EPISODE_LEN)*100:.1f}%, "
                              f"U3: {(avg_ide_counts[2]/MAX_EPISODE_LEN)*100:.1f}%]")
                        print(f"  └─ Current Ep {i+1}:  {current_ep_ide_pct:.2f}% "
                              f"[U1: {ep_ide_counts[0]}, U2: {ep_ide_counts[1]}, U3: {ep_ide_counts[2]}]")
                        
                        # Cumulative
                        if len(ide_stats_history) > 0:
                            all_ide = np.array(ide_stats_history)
                            cumulative_ide_pct = (np.mean(all_ide) / MAX_EPISODE_LEN) * 100
                            print(f"     └─ Cumulative:    {cumulative_ide_pct:.2f}%")
                        
                        print(f"{'━'*70}\n")
                    
                    break
            
            
            if (i + 1) % 1500 == 0:
                for u in user_list:
                    u.agent.save_checkpoint(f"{model_prefix}_user_{u.user_id}")
                
                # Print detailed stats at checkpoints
                print(f"\n{'═'*70}")
                print(f" CHECKPOINT SAVED AT EPISODE {i+1}")
                print(f"{'═'*70}")
                
                # Calculate overall statistics
                all_rewards = np.array(res_r)
                all_ide = np.array(ide_stats_history)
                
                print(f"\n Overall Statistics (Episodes 0-{i+1}):")
                print(f"  Mean Reward: {np.mean(all_rewards, axis=0)}")
                print(f"  Std Reward:  {np.std(all_rewards, axis=0)}")
                
                
                print(f"\n{'─'*70}")
                print(f"DETAILED IDE ANALYSIS BY USER")
                print(f"{'─'*70}")
                
                for idx, u in enumerate(user_list):
                    stats = u.get_ide_stats()
                    user_ide_history = np.array(per_user_ide_history[idx])
                    
                    if len(user_ide_history) > 0:
                        mean_calls = np.mean(user_ide_history)
                        std_calls = np.std(user_ide_history)
                        min_calls = np.min(user_ide_history)
                        max_calls = np.max(user_ide_history)
                        usage_pct = (mean_calls / MAX_EPISODE_LEN) * 100
                        
                        print(f"\nUser {u.user_id} ({u_cfg['rate']} Mbps):")
                        print(f"  ├─ IDE Usage:        {usage_pct:.2f}%")
                        print(f"  ├─ Avg Calls/Ep:     {mean_calls:.2f} ± {std_calls:.2f}")
                        print(f"  ├─ Range:            [{min_calls}, {max_calls}]")
                        print(f"  ├─ Total IDE Apps:   {stats['ide_applications']}")
                        print(f"  ├─ Current Phase:    {stats['current_phase']}")
                        print(f"  ├─ Current Interval: {stats['current_interval']} steps")
                        print(f"  ├─ Blend Alpha:      {stats['current_blend_alpha']:.3f}")
                        print(f"  └─ Buffer Size:      {stats['replay_buffer_size']:,}")
                        
                    
                        warmup_end = ide_config['warmup_episodes']
                        phase_transition = ide_config['phase_transition']
                        
                        if i+1 > warmup_end:
                            if i+1 <= phase_transition:
                                expected_pct = 100 / ide_config['conservative_interval']
                                print(f"     └─ Expected (Conservative): {expected_pct:.2f}%")
                            else:
                                expected_pct = 100 / ide_config['aggressive_interval']
                                print(f"     └─ Expected (Aggressive):   {expected_pct:.2f}%")
                
                print(f"\n{'═'*70}\n")

        
        elapsed_time = time.time() - start_time
        
        print(f"\n{'═'*70}")
        print(f"TRIAL {k} COMPLETED")
        print(f"{'═'*70}")
        print(f"Training Time: {elapsed_time/60:.2f} minutes ({elapsed_time/3600:.2f} hours)")
        
        
        all_ide = np.array(ide_stats_history)
        overall_ide_pct = (np.mean(all_ide) / MAX_EPISODE_LEN) * 100
        
        print(f"\n{'─'*70}")
        print(f"FINAL IDE STATISTICS")
        print(f"{'─'*70}")
        print(f"Overall IDE Usage: {overall_ide_pct:.2f}%")
        print(f"\nPer-User Breakdown:")
        
        for idx in range(len(user_list)):
            user_ide_history = np.array(per_user_ide_history[idx])
            mean_calls = np.mean(user_ide_history)
            user_ide_pct = (mean_calls / MAX_EPISODE_LEN) * 100
            
            # Phase-wise analysis
            warmup_end = ide_config['warmup_episodes']
            phase_transition = ide_config['phase_transition']
            
            warmup_calls = np.mean(user_ide_history[:warmup_end]) if len(user_ide_history) > warmup_end else 0
            conservative_calls = np.mean(user_ide_history[warmup_end:phase_transition]) if len(user_ide_history) > phase_transition else 0
            aggressive_calls = np.mean(user_ide_history[phase_transition:]) if len(user_ide_history) > phase_transition else 0
            
            print(f"\n  User {idx+1}:")
            print(f"    ├─ Overall:      {user_ide_pct:.2f}% ({mean_calls:.2f} calls/ep)")
            print(f"    ├─ Warmup:       {(warmup_calls/MAX_EPISODE_LEN)*100:.2f}% (Ep 0-{warmup_end})")
            print(f"    ├─ Conservative: {(conservative_calls/MAX_EPISODE_LEN)*100:.2f}% (Ep {warmup_end}-{phase_transition})")
            print(f"    └─ Aggressive:   {(aggressive_calls/MAX_EPISODE_LEN)*100:.2f}% (Ep {phase_transition}+)")
        
        print(f"\n{'═'*70}\n")
        
        
        timestamp = time.strftime("%Y%m%d_%H%M%S")
        result_file = os.path.join(res_path, f'log_trial_{k}_{timestamp}.npz')
        
        np.savez(result_file,
                 rewards=np.array(res_r),
                 powers=np.array(res_p),
                 buffers=np.array(res_b),
                 overflows=np.array(res_o),
                 final_buffers=np.array(res_d),
                 ide_counts=np.array(ide_stats_history),
                 user1_ide=np.array(per_user_ide_history[0]),
                 user2_ide=np.array(per_user_ide_history[1]),
                 user3_ide=np.array(per_user_ide_history[2]),
                 config=ide_config,
                 training_time=elapsed_time)
        
      
        for u in user_list:
            u.agent.save_checkpoint(f"{model_prefix}_user_{u.user_id}")
        
        print(f"✓ Results saved to: {result_file}")
        print(f"✓ Models saved to: {model_prefix}_user_*\n")

    print(f"\n{'═'*70}")
    print("ALL TRIALS COMPLETED!")
    print(f"{'═'*70}")
    print(f"Results directory: {res_path}")
    print(f"Models directory: {model_root}")
    print(f"\nTo analyze IDE usage, load the .npz files and check:")
    print(f"   - ide_counts: Overall IDE calls per episode")
    print(f"   - user1_ide, user2_ide, user3_ide: Per-user history")
    print(f"{'═'*70}\n")



In [None]:

main()
# 981 phút cho 7 models