In [None]:
def _calculate_reward(self):
        """Simplified phase-specific reward function."""
        
        BASE_SCALE = 0.05
        CLOSE_DISTANCE = 150
        DAMAGE_PENALTY = -5.0  # Reduced from -15.0
        GAME_OVER_PENALTY = -7.5
        BOUNDARY_PENALTY_SCALE = -0.5  # Reduced from -1.0
        CENTER_REWARD_SCALE = 0.15
        MOVEMENT_REWARD_SCALE = 0.5  # Increased from 0.2
        SURVIVAL_REWARD = 0.01  # Small reward for each frame survived
        
        # Get current training phase
        training_phase = 0
        if hasattr(self.wave_manager, 'training_phase'):
            training_phase = self.wave_manager.training_phase
        
        # Initialize reward
        reward = 0
        
        # Get game state
        player = self.player
        player_max_health = getattr(player, "max_health", 15)
        enemy_count = len(self.enemies)
        
        # Common rewards across all phases
        
        # Damage penalty (highest priority penalty)
        if self.player_took_damage_this_frame:
            reward += DAMAGE_PENALTY
        
        # Boundary penalty to keep player away from edges - IMPROVED
        edge_distance = min(
            player.center_pos.x, 
            WINDOW_WIDTH - player.center_pos.x,
            player.center_pos.y, 
            WINDOW_HEIGHT - player.center_pos.y
        )
        
        # Start penalty earlier and make it exponential as player gets closer to edges
        if edge_distance < 300:  # Increased from 200 to start boundary penalty earlier
            edge_factor = 1.0 - edge_distance/300.0
            reward += BOUNDARY_PENALTY_SCALE * (edge_factor ** 2) * 3  # Exponential penalty
        
        # Game over penalty
        if self.game_over:
            reward += GAME_OVER_PENALTY
        
       # SIMPLIFIED MOVEMENT REWARDS - replacing all the complex movement code
        if not hasattr(self, "prev_player_pos"):
            self.prev_player_pos = Vector2(player.center_pos.x, player.center_pos.y)
            self.prev_rotation = player.rotation

        current_pos = Vector2(player.center_pos.x, player.center_pos.y)
        # Calculate distance moved
        dx = current_pos.x - self.prev_player_pos.x
        dy = current_pos.y - self.prev_player_pos.y
        distance_moved = math.sqrt(dx*dx + dy*dy)

        # Calculate rotation change (absolute value)
        rotation_change = abs(player.rotation - self.prev_rotation)
        if rotation_change > 180:  # Handle wrap-around
            rotation_change = 360 - rotation_change

        # Simple rewards for movement and rotation
        if distance_moved > 5.0:  # Only reward significant movement
            movement_reward = 0.1 * min(distance_moved, 30.0)  # Cap at 30 pixels per frame
            reward += movement_reward

        if rotation_change > 5.0:  # Only reward significant rotation
            rotation_reward = 0.05 * min(rotation_change, 90.0)  # Cap at 90 degrees per frame
            reward += rotation_reward

        # Store current position and rotation for next frame
        self.prev_player_pos = current_pos
        self.prev_rotation = player.rotation
        
        # Phase-specific rewards
        if training_phase == 0:  # Movement Only
            # Movement rewards now handled in the common section
            pass
        
        elif training_phase == 1:  # Meteor Dodging
            # Reward for surviving with meteors
            # Check for meteors that came close but didn't hit (near misses)
             # Add survival reward near the beginning
            reward += SURVIVAL_REWARD  # Reward for surviving each frame
            near_miss_reward = 0
            if hasattr(self, 'meteors') and len(getattr(self, 'meteors', [])) > 0:
                for meteor in self.meteors:
                    # Calculate distance to meteor - FIX: use pos instead of center_pos
                    dx = meteor.pos.x - player.center_pos.x
                    dy = meteor.pos.y - player.center_pos.y
                    distance = math.sqrt(dx*dx + dy*dy)
                    
                    # Reward for close passes (near misses)
                    if distance < CLOSE_DISTANCE and distance > meteor.collision_radius + player.collision_radius:
                        # Calculate how close the miss was (closer = higher reward)
                        proximity_factor = 1.0 - max(0.2, (distance - player.collision_radius - meteor.collision_radius) / CLOSE_DISTANCE)
                        near_miss_reward += BASE_SCALE * 20.0 * proximity_factor**2
            
            # Add this reward
            reward += near_miss_reward
        elif training_phase == 2:  # Laser Dodging
            # Reward for surviving with lasers
            # Check if player is avoiding active lasers
            reward += SURVIVAL_REWARD  # Reward for surviving each frame
            if hasattr(self, 'lasers') and len(getattr(self, 'lasers', [])) > 0:
                for laser in self.lasers:
                    if getattr(laser, 'active', False):
                        # If player moved away from laser's path
                        if hasattr(laser, 'is_targeting_player') and laser.is_targeting_player:
                            if hasattr(self, 'player_dodged_laser_this_frame') and self.player_dodged_laser_this_frame:
                                reward += BASE_SCALE * 20.0  # Significant reward for actively dodging
                
                # Reward for successfully avoiding a laser that was targeting the player
                if hasattr(self, 'lasers_avoided_this_frame'):
                    reward += BASE_SCALE * 25.0 * self.lasers_avoided_this_frame
        
        elif training_phase == 3:  # Aim Training
            # Movement rewards now handled in common section
            is_shooting = getattr(player, "is_shooting", False) or getattr(player, "just_fired", False)
            # Reward destroying enemies
            if self.enemies_destroyed_this_frame > 0:
                reward += BASE_SCALE * 50.0 * self.enemies_destroyed_this_frame  # Increased destruction reward
            
            # Reward for shooting at enemies
            if enemy_count > 0:
                # Check if aiming at any enemy
                player_angle_rad = math.radians(player.rotation)
                facing_vector = (math.sin(player_angle_rad), -math.cos(player_angle_rad))
                
                best_aim_accuracy = 0
                is_aiming_at_enemy = False
                
                for enemy in self.enemies:
                    dx = enemy.center_pos.x - player.center_pos.x
                    dy = enemy.center_pos.y - player.center_pos.y
                    dist = math.sqrt(dx*dx + dy*dy)
                    
                    if dist > 0:
                        # Vector to enemy
                        to_enemy = (dx/dist, dy/dist)
                        
                        # Calculate aim accuracy using dot product
                        dot_product = facing_vector[0]*to_enemy[0] + facing_vector[1]*to_enemy[1]
                        
                        # If aiming reasonably well at any enemy
                        if dot_product > 0.7:  # Within ~45 degrees
                            is_aiming_at_enemy = True
                            if dot_product > best_aim_accuracy:
                                best_aim_accuracy = dot_product
                
                # Reward aiming at enemy
                if is_aiming_at_enemy:
                    aim_accuracy = (best_aim_accuracy - 0.7) / 0.3
                    reward += BASE_SCALE * 8.0 * (aim_accuracy ** 2)
                    
                    # Extra reward for shooting while aiming
                    if is_shooting:
                        reward += BASE_SCALE * 40.0 * (aim_accuracy ** 2)  # Increased shooting reward

            # New: Add penalty for not aiming at enemies when they exist
            if enemy_count > 0 and not is_aiming_at_enemy:
                # Base penalty scaled by how many enemies are present
                reward += BASE_SCALE * -4.0 * (enemy_count / 5.0)  # -4 when 5 enemies
                
                # Additional penalty if player is rotating away from enemies
                if rotation_change > 5.0 and not is_aiming_at_enemy:
                    reward += BASE_SCALE * -2.0
        
        elif training_phase == 4:  # Full Combat
            # Movement rewards now handled in common section
            
            # Wave completion reward
            if hasattr(self, "prev_wave_active") and not self.wave_manager.wave_active and self.prev_wave_active:
                wave_num = self.wave_manager.current_wave
                reward += BASE_SCALE * 60.0 * (1.0 + 0.1*wave_num)  # Increased wave completion reward
            
            # Enemy destruction reward
            if self.enemies_destroyed_this_frame > 0:
                reward += BASE_SCALE * 50.0 * self.enemies_destroyed_this_frame  # Increased destruction reward

        # Store current position for next frame
        self.prev_player_pos = current_pos
        
        # Store wave state for next frame
        if not hasattr(self, "prev_wave_active"):
            self.prev_wave_active = getattr(self.wave_manager, "wave_active", False)
        else:
            self.prev_wave_active = getattr(self.wave_manager, "wave_active", False)
        
        self.current_reward = reward
        self.reward_history.append(reward)
        self.total_reward += reward
        
        return reward

In [None]:
 def _calculate_reward(self):

        """Simplified phase-specific reward function."""

        

        BASE_SCALE = 0.05

        CLOSE_DISTANCE = 150

        DAMAGE_PENALTY = -5.0  # Reduced from -15.0

        GAME_OVER_PENALTY = -7.5

        BOUNDARY_PENALTY_SCALE = -0.5  # Reduced from -1.0

        CENTER_REWARD_SCALE = 0.15

        MOVEMENT_REWARD_SCALE = 0.5  # Increased from 0.2

        SURVIVAL_REWARD = 0.01  # Small reward for each frame survived

                

        # Anti-camping constants using grid

        GRID_CELLS_X = 10  # Number of grid cells across width

        GRID_CELLS_Y = 10  # Number of grid cells across height

        GRID_CELL_WIDTH = WINDOW_WIDTH / GRID_CELLS_X

        GRID_CELL_HEIGHT = WINDOW_HEIGHT / GRID_CELLS_Y

        CAMPING_TIME_THRESHOLD = 120  # Frames (2 seconds at 60 FPS)

        CAMPING_PENALTY = -0.5  # Starting penalty

        CAMPING_PENALTY_MULTIPLIER = 1.05  # Increased from 1.03 to escalate camping penalty faster

        

        # Get current training phase

        training_phase = 0

        if hasattr(self.wave_manager, 'training_phase'):

            training_phase = self.wave_manager.training_phase

        

        # Initialize reward

        reward = 0

        

        # Get game state

        player = self.player

        player_max_health = getattr(player, "max_health", 15)

        enemy_count = len(self.enemies)

        

        # Common rewards across all phases

        

        # Damage penalty (highest priority penalty)

        if self.player_took_damage_this_frame:

            reward += DAMAGE_PENALTY

        

        # Boundary penalty to keep player away from edges - IMPROVED

        edge_distance = min(

            player.center_pos.x, 

            WINDOW_WIDTH - player.center_pos.x,

            player.center_pos.y, 

            WINDOW_HEIGHT - player.center_pos.y

        )

        

        # Start penalty earlier and make it exponential as player gets closer to edges

        if edge_distance < 300:  # Increased from 200 to start boundary penalty earlier

            edge_factor = 1.0 - edge_distance/300.0

            reward += BOUNDARY_PENALTY_SCALE * (edge_factor ** 2) * 3  # Exponential penalty

        

        # Game over penalty

        if self.game_over:

            reward += GAME_OVER_PENALTY

        

       # SIMPLIFIED MOVEMENT REWARDS - replacing all the complex movement code

        if not hasattr(self, "prev_player_pos"):

            self.prev_player_pos = Vector2(player.center_pos.x, player.center_pos.y)

            self.prev_rotation = player.rotation



        current_pos = Vector2(player.center_pos.x, player.center_pos.y)

        # Calculate distance moved

        dx = current_pos.x - self.prev_player_pos.x

        dy = current_pos.y - self.prev_player_pos.y

        distance_moved = math.sqrt(dx*dx + dy*dy)



        # Calculate rotation change (absolute value)

        rotation_change = abs(player.rotation - self.prev_rotation)

        if rotation_change > 180:  # Handle wrap-around

            rotation_change = 360 - rotation_change



        # Simple rewards for movement and rotation

        if distance_moved > 5.0:  # Only reward significant movement

            movement_reward = 0.1 * min(distance_moved, 30.0)  # Cap at 30 pixels per frame

            reward += movement_reward

        

        # Add survival reward near the beginning

        reward += SURVIVAL_REWARD  # Reward for surviving each frame



        if rotation_change > 5.0:  # Only reward significant rotation

            rotation_reward = 0.05 * min(rotation_change, 90.0)  # Cap at 90 degrees per frame

            reward += rotation_reward



        # Store current position and rotation for next frame

        self.prev_player_pos = current_pos

        self.prev_rotation = player.rotation

        

        # Phase-specific rewards

        if training_phase == 0:  # Movement Only

            # Movement rewards now handled in the common section

            pass

        

        elif training_phase == 1:  # Meteor Dodging

            # Reward for surviving with meteors

            # Check for meteors that came close but didn't hit (near misses)

            near_miss_reward = 0

            if hasattr(self, 'meteors') and len(getattr(self, 'meteors', [])) > 0:

                for meteor in self.meteors:

                    # Calculate distance to meteor - FIX: use pos instead of center_pos

                    dx = meteor.pos.x - player.center_pos.x

                    dy = meteor.pos.y - player.center_pos.y

                    distance = math.sqrt(dx*dx + dy*dy)

                    

                    # Reward for close passes (near misses)

                    if distance < CLOSE_DISTANCE and distance > meteor.collision_radius + player.collision_radius:

                        # Calculate how close the miss was (closer = higher reward)

                        proximity_factor = 1.0 - max(0.2, (distance - player.collision_radius - meteor.collision_radius) / CLOSE_DISTANCE)

                        near_miss_reward += BASE_SCALE * 20.0 * proximity_factor**2

            

            # Add this reward

            reward += near_miss_reward

        elif training_phase == 2:  # Laser Dodging

            # Reward for surviving with lasers

            # Check if player is avoiding active lasers

            if hasattr(self, 'lasers') and len(getattr(self, 'lasers', [])) > 0:

                for laser in self.lasers:

                    if getattr(laser, 'active', False):

                        # If player moved away from laser's path

                        if hasattr(laser, 'is_targeting_player') and laser.is_targeting_player:

                            if hasattr(self, 'player_dodged_laser_this_frame') and self.player_dodged_laser_this_frame:

                                reward += BASE_SCALE * 20.0  # Significant reward for actively dodging

                

                # Reward for successfully avoiding a laser that was targeting the player

                if hasattr(self, 'lasers_avoided_this_frame'):

                    reward += BASE_SCALE * 25.0 * self.lasers_avoided_this_frame

        

        elif training_phase == 3:  # Aim Training

            # Movement rewards now handled in common section

            is_shooting = getattr(player, "is_shooting", False) or getattr(player, "just_fired", False)

            

            # Reward for shooting at enemies

            if enemy_count > 0:

                # Check if aiming at any enemy

                player_angle_rad = math.radians(player.rotation)

                facing_vector = (math.sin(player_angle_rad), -math.cos(player_angle_rad))

                

                best_aim_accuracy = 0

                is_aiming_at_enemy = False

                

                for enemy in self.enemies:

                    dx = enemy.center_pos.x - player.center_pos.x

                    dy = enemy.center_pos.y - player.center_pos.y

                    dist = math.sqrt(dx*dx + dy*dy)

                    

                    if dist > 0:

                        # Vector to enemy

                        to_enemy = (dx/dist, dy/dist)

                        

                        # Calculate aim accuracy using dot product

                        dot_product = facing_vector[0]*to_enemy[0] + facing_vector[1]*to_enemy[1]

                        

                        # If aiming reasonably well at any enemy

                        if dot_product > 0.7:  # Within ~45 degrees

                            is_aiming_at_enemy = True

                            if dot_product > best_aim_accuracy:

                                best_aim_accuracy = dot_product

                

                # Reward aiming at enemy

                if is_aiming_at_enemy:

                    aim_accuracy = (best_aim_accuracy - 0.7) / 0.3

                    reward += BASE_SCALE * 8.0 * (aim_accuracy ** 2)

                    

                    # Extra reward for shooting while aiming

                    if is_shooting:

                        reward += BASE_SCALE * 40.0 * (aim_accuracy ** 2)  # Increased shooting reward

            

            # Reward destroying enemies

            if self.enemies_destroyed_this_frame > 0:

                reward += BASE_SCALE * 50.0 * self.enemies_destroyed_this_frame  # Increased destruction reward

        

        elif training_phase == 4:  # Full Combat

            # Movement rewards now handled in common section

            

            # Wave completion reward

            if hasattr(self, "prev_wave_active") and not self.wave_manager.wave_active and self.prev_wave_active:

                wave_num = self.wave_manager.current_wave

                reward += BASE_SCALE * 60.0 * (1.0 + 0.1*wave_num)  # Increased wave completion reward

            

            # Enemy destruction reward

            if self.enemies_destroyed_this_frame > 0:

                reward += BASE_SCALE * 50.0 * self.enemies_destroyed_this_frame  # Increased destruction reward

            

            # Aiming and shooting rewards (simplified)

            if enemy_count > 0:

                player_angle_rad = math.radians(player.rotation)

                facing_vector = (math.sin(player_angle_rad), -math.cos(player_angle_rad))

                

                is_aiming_at_enemy = False

                best_aim_accuracy = 0

                

                for enemy in self.enemies:

                    dx = enemy.center_pos.x - player.center_pos.x

                    dy = enemy.center_pos.y - player.center_pos.y

                    dist = math.sqrt(dx*dx + dy*dy)

                    

                    if dist > 0:

                        # Vector to enemy

                        to_enemy = (dx/dist, dy/dist)

                        

                        # Calculate aim accuracy using dot product

                        dot_product = facing_vector[0]*to_enemy[0] + facing_vector[1]*to_enemy[1]

                        

                        # If aiming reasonably well at any enemy

                        if dot_product > 0.7:  # Within ~45 degrees

                            is_aiming_at_enemy = True

                            if dot_product > best_aim_accuracy:

                                best_aim_accuracy = dot_product

                

                is_shooting = getattr(player, "is_shooting", False) or getattr(player, "just_fired", False)

                if is_shooting and enemy_count > 0:

                    # Extra reward for shooting while aiming

                    if is_aiming_at_enemy:

                        aim_accuracy = (best_aim_accuracy - 0.7) / 0.3

                        reward += BASE_SCALE * 15.0 * (aim_accuracy ** 2)  # Increased shooting reward

            

            # Add meteor dodging rewards from Phase 1

            near_miss_reward = 0

            if hasattr(self, 'meteors') and len(getattr(self, 'meteors', [])) > 0:

                for meteor in self.meteors:

                    # Calculate distance to meteor - FIX: use pos instead of center_pos

                    dx = meteor.pos.x - player.center_pos.x

                    dy = meteor.pos.y - player.center_pos.y

                    distance = math.sqrt(dx*dx + dy*dy)

                    

                    # Reward for close passes (near misses)

                    if distance < CLOSE_DISTANCE and distance > meteor.collision_radius + player.collision_radius:

                        # Calculate how close the miss was (closer = higher reward)

                        proximity_factor = 1.0 - max(0.2, (distance - player.collision_radius - meteor.collision_radius) / CLOSE_DISTANCE)

                        near_miss_reward += BASE_SCALE * 8.0 * proximity_factor**2  # Slightly reduced from Phase 1

            

            # Add this reward

            reward += near_miss_reward

            

            # Add laser dodging rewards from Phase 2

            if hasattr(self, 'lasers') and len(getattr(self, 'lasers', [])) > 0:

                for laser in self.lasers:

                    if getattr(laser, 'active', False):

                        # If player moved away from laser's path

                        if hasattr(laser, 'is_targeting_player') and laser.is_targeting_player:

                            if hasattr(self, 'player_dodged_laser_this_frame') and self.player_dodged_laser_this_frame:

                                reward += BASE_SCALE * 15.0  # Slightly reduced from Phase 2

                

                # Reward for successfully avoiding a laser that was targeting the player

                if hasattr(self, 'lasers_avoided_this_frame'):

                    reward += BASE_SCALE * 20.0 * self.lasers_avoided_this_frame  # Slightly reduced from Phase 2

        

        # Store current position for next frame

        self.prev_player_pos = current_pos

        

        # Store wave state for next frame

        if not hasattr(self, "prev_wave_active"):

            self.prev_wave_active = getattr(self.wave_manager, "wave_active", False)

        else:

            self.prev_wave_active = getattr(self.wave_manager, "wave_active", False)

        

        self.current_reward = reward

        self.reward_history.append(reward)

        self.total_reward += reward

        

        return reward

    
