# Phase 3.5: HumanEval Temperature Robustness Results Inspection

This notebook inspects Phase 3.5 HumanEval results to verify:
- Dataset files created for each temperature
- metadata.json contains correct information
- Prompts formatted correctly
- Code generation working
- Evaluation results accurate
- Activations captured

In [14]:
import pandas as pd
import os
from pathlib import Path
import glob
import json

# Set pandas display options to show FULL content
pd.set_option('display.max_colwidth', None)  # Show full column content (NO LIMITS)
pd.set_option('display.max_columns', None)   # Show all columns
pd.set_option('display.width', None)         # Don't wrap to multiple lines
pd.set_option('display.max_rows', None)      # Show ALL rows (no limit)

print("‚úÖ Pandas display options set to show FULL content")

‚úÖ Pandas display options set to show FULL content


In [15]:
# Auto-discovery of Phase 3.5 HumanEval data
datasets_dir = "../data/phase3_5_humaneval/"
pattern = os.path.join(datasets_dir, "dataset_temp_*.parquet")
matching_files = glob.glob(pattern)

if matching_files:
    # Sort by temperature (filename)
    matching_files.sort()
    print("=" * 80)
    print("PHASE 3.5: HUMANEVAL TEMPERATURE ROBUSTNESS")
    print("=" * 80)
    print(f"\nüîç Found {len(matching_files)} temperature file(s)")
    for file in matching_files:
        file_size = os.path.getsize(file) / 1024  # KB
        print(f"  üìÅ {Path(file).name} ({file_size:.2f} KB)")
else:
    raise FileNotFoundError(f"‚ùå No temperature dataset files found in {datasets_dir}")

PHASE 3.5: HUMANEVAL TEMPERATURE ROBUSTNESS

üîç Found 1 temperature file(s)
  üìÅ dataset_temp_0_0.parquet (131.05 KB)


In [16]:
# Load and display metadata.json
metadata_path = Path(datasets_dir) / "metadata.json"

if metadata_path.exists():
    print("\n" + "=" * 80)
    print("METADATA.JSON")
    print("=" * 80)
    
    with open(metadata_path, 'r') as f:
        metadata = json.load(f)
    
    print("\nüìÑ Full metadata content:")
    print(json.dumps(metadata, indent=2))
    
    print("\nüìä Key information:")
    print(f"  - Creation timestamp: {metadata.get('creation_timestamp', 'N/A')}")
    print(f"  - Best layers: Correct={metadata.get('best_layers', {}).get('correct', 'N/A')}, Incorrect={metadata.get('best_layers', {}).get('incorrect', 'N/A')}")
    print(f"  - Features: Correct={metadata.get('best_layers', {}).get('correct_feature_idx', 'N/A')}, Incorrect={metadata.get('best_layers', {}).get('incorrect_feature_idx', 'N/A')}")
    print(f"  - Temperatures tested: {metadata.get('temperatures', [])}")
    print(f"  - Tasks attempted: {metadata.get('n_tasks_attempted', 'N/A')}")
    print(f"  - Tasks included: {metadata.get('n_tasks_included', 'N/A')}")
else:
    print(f"\n‚ö†Ô∏è  metadata.json not found at {metadata_path}")


METADATA.JSON

üìÑ Full metadata content:
{
  "creation_timestamp": "2025-11-21T07:38:09.383352",
  "best_layers": {
    "correct": 16,
    "incorrect": 19,
    "correct_feature_idx": 14439,
    "incorrect_feature_idx": 5441
  },
  "extraction_layers": [
    16,
    19
  ],
  "temperatures": [
    0.0
  ],
  "samples_per_temperature": 3,
  "validation_task_ids": [
    0,
    1,
    2,
    3,
    4,
    5,
    6,
    7,
    8,
    9,
    10,
    11,
    12,
    13,
    14,
    15,
    16,
    17,
    18,
    19,
    20,
    21,
    22,
    23,
    24,
    25,
    26,
    27,
    28,
    29,
    30,
    31,
    32,
    33,
    34,
    35,
    36,
    37,
    38,
    39,
    40,
    41,
    42,
    43,
    44,
    45,
    46,
    47,
    48,
    49,
    50,
    51,
    52,
    53,
    54,
    55,
    56,
    57,
    58,
    59,
    60,
    61,
    62,
    63,
    64,
    65,
    66,
    67,
    68,
    69,
    70,
    71,
    72,
    73,
    74,
    75,
    76,
    77,
    78,
    79,
 

In [17]:
# Check activations directory
activations_dir = Path(datasets_dir) / "activations"

print("\n" + "=" * 80)
print("ACTIVATIONS DIRECTORY")
print("=" * 80)

if activations_dir.exists():
    print(f"\n‚úÖ Activations directory exists: {activations_dir}")
    
    # Check subdirectories
    subdirs = [d for d in activations_dir.iterdir() if d.is_dir()]
    print(f"\nüìÅ Subdirectories ({len(subdirs)}):")
    for subdir in subdirs:
        files = list(subdir.glob("*.npz"))
        print(f"  - {subdir.name}/: {len(files)} activation files")
        if files:
            print(f"    Sample: {files[0].name}")
else:
    print(f"\n‚ö†Ô∏è  Activations directory not found: {activations_dir}")


ACTIVATIONS DIRECTORY

‚úÖ Activations directory exists: ../data/phase3_5_humaneval/activations

üìÅ Subdirectories (2):
  - task_activations/: 328 activation files
    Sample: 23_layer_16.npz
  - attention_patterns/: 328 activation files
    Sample: 158_layer_19_attention.npz


In [18]:
# Load and display each temperature dataset
for file_path in matching_files:
    file_name = Path(file_path).name
    temp = file_name.replace('dataset_temp_', '').replace('.parquet', '').replace('_', '.')
    
    print(f"\n{'=' * 80}")
    print(f"TEMPERATURE: {temp}")
    print(f"Dataset: {file_name}")
    print("=" * 80)
    
    df = pd.read_parquet(file_path)
    
    # Basic stats
    print(f"\nüìä Statistics:")
    print(f"  - Total records: {len(df)}")
    print(f"  - Columns: {list(df.columns)}")
    
    # Pass rate
    if 'test_passed' in df.columns:
        n_passed = df['test_passed'].sum()
        pass_rate = n_passed / len(df) * 100
        print(f"\n‚úÖ Pass Rate:")
        print(f"  - Passed: {n_passed}/{len(df)} ({pass_rate:.2f}%)")
        print(f"  - Failed: {len(df) - n_passed}/{len(df)} ({100 - pass_rate:.2f}%)")
    
    # Generation time stats
    if 'generation_time' in df.columns:
        print(f"\n‚è±Ô∏è  Generation Time:")
        print(f"  - Mean: {df['generation_time'].mean():.2f}s")
        print(f"  - Median: {df['generation_time'].median():.2f}s")
        print(f"  - Min: {df['generation_time'].min():.2f}s")
        print(f"  - Max: {df['generation_time'].max():.2f}s")
    
    print(f"\n{'=' * 80}")
    print(f"First 10 records:")
    print("=" * 80)
    display(df)


TEMPERATURE: 0.0
Dataset: dataset_temp_0_0.parquet

üìä Statistics:
  - Total records: 164
  - Columns: ['task_id', 'temperature', 'prompt', 'generated_code', 'test_passed', 'error_message', 'generation_time', 'cyclomatic_complexity', 'generation_idx', 'test_list']

‚úÖ Pass Rate:
  - Passed: 23/164 (14.02%)
  - Failed: 141/164 (85.98%)

‚è±Ô∏è  Generation Time:
  - Mean: 14.78s
  - Median: 10.21s
  - Min: 0.98s
  - Max: 45.00s

First 10 records:


Unnamed: 0,task_id,temperature,prompt,generated_code,test_passed,error_message,generation_time,cyclomatic_complexity,generation_idx,test_list
0,0,0.0,"from typing import List\n\n\ndef has_close_elements(numbers: List[float], threshold: float) -> bool:\n """""" Check if in given list of numbers, are any two numbers closer to each other than\n given threshold.\n >>> has_close_elements([1.0, 2.0, 3.0], 0.5)\n False\n >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\n True\n """"""\n\n\nassert assert has_close_elements([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True\nassert assert has_close_elements([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False\nassert assert has_close_elements([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True\nassert assert has_close_elements([1.0, 2.0, 5.9, 4.0, 5.0], 0.8) == False\nassert assert has_close_elements([1.0, 2.0, 3.0, 4.0, 5.0, 2.0], 0.1) == True\nassert assert has_close_elements([1.1, 2.2, 3.1, 4.1, 5.1], 1.0) == True\nassert assert has_close_elements([1.1, 2.2, 3.1, 4.1, 5.1], 0.5) == False\n\n# Solution:","def has_close_elements(numbers: List[float], threshold: float) -> bool:\n """""" Check if in given list of numbers, are any two numbers closer to each other than\n given threshold.\n >>> has_close_elements([1.0, 2.0, 3.0], 0.5)\n False\n >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\n True\n """"""\n for i in range(len(numbers)):\n for j in range(i+1, len(numbers)):\n if abs(numbers[i] - numbers[j]) < threshold:\n return True\n return False",False,,11.130911,0,0,"[""assert has_close_elements([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True"", ""assert has_close_elements([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False"", ""assert has_close_elements([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True"", ""assert has_close_elements([1.0, 2.0, 5.9, 4.0, 5.0], 0.8) == False"", ""assert has_close_elements([1.0, 2.0, 3.0, 4.0, 5.0, 2.0], 0.1) == True"", ""assert has_close_elements([1.1, 2.2, 3.1, 4.1, 5.1], 1.0) == True"", ""assert has_close_elements([1.1, 2.2, 3.1, 4.1, 5.1], 0.5) == False""]"
1,1,0.0,"from typing import List\n\n\ndef separate_paren_groups(paren_string: str) -> List[str]:\n """""" Input to this function is a string containing multiple groups of nested parentheses. Your goal is to\n separate those group into separate strings and return the list of those.\n Separate groups are balanced (each open brace is properly closed) and not nested within each other\n Ignore any spaces in the input string.\n >>> separate_paren_groups('( ) (( )) (( )( ))')\n ['()', '(())', '(()())']\n """"""\n\n\nassert assert separate_paren_groups('(()()) ((())) () ((())()())') == [\nassert assert separate_paren_groups('() (()) ((())) (((())))') == [\nassert assert separate_paren_groups('(()(())((())))') == [\nassert assert separate_paren_groups('( ) (( )) (( )( ))') == ['()', '(())', '(()())']\n\n# Solution:","def separate_paren_groups(paren_string: str) -> List[str]:\n """""" Input to this function is a string containing multiple groups of nested parentheses. Your goal is to\n separate those group into separate strings and return the list of those.\n Separate groups are balanced (each open brace is properly closed) and not nested within each other\n Ignore any spaces in the input string.\n >>> separate_paren_groups('( ) (( )) (( )( ))')\n ['()', '(())', '(()())']\n """"""\n paren_string = paren_string.replace(' ', '')\n paren_string = paren_string.replace('(', '(')\n paren_string = paren_string.replace(')', ')')\n paren_string = paren_string.replace('()', '')\n paren_string = paren_string.replace('(())', '')\n paren_string = paren_string.replace('(()())', '')\n paren_string = paren_string.replace('(()()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.replace('(()', '')\n paren_string = paren_string.",False,,43.849078,0,0,"[""assert separate_paren_groups('(()()) ((())) () ((())()())') == ["", ""assert separate_paren_groups('() (()) ((())) (((())))') == ["", ""assert separate_paren_groups('(()(())((())))') == ["", ""assert separate_paren_groups('( ) (( )) (( )( ))') == ['()', '(())', '(()())']""]"
2,2,0.0,"\n\ndef truncate_number(number: float) -> float:\n """""" Given a positive floating point number, it can be decomposed into\n and integer part (largest integer smaller than given number) and decimals\n (leftover part always smaller than 1).\n\n Return the decimal part of the number.\n >>> truncate_number(3.5)\n 0.5\n """"""\n\n\nassert assert truncate_number(3.5) == 0.5\nassert assert abs(truncate_number(1.33) - 0.33) < 1e-6\nassert assert abs(truncate_number(123.456) - 0.456) < 1e-6\n\n# Solution:","def truncate_number(number: float) -> float:\n """""" Given a positive floating point number, it can be decomposed into\n and integer part (largest integer smaller than given number) and decimals\n (leftover part always smaller than 1).\n\n Return the decimal part of the number.\n >>> truncate_number(3.5)\n 0.5\n """"""\n return number - int(number)",True,,5.307505,0,0,"[""assert truncate_number(3.5) == 0.5"", ""assert abs(truncate_number(1.33) - 0.33) < 1e-6"", ""assert abs(truncate_number(123.456) - 0.456) < 1e-6""]"
3,3,0.0,"from typing import List\n\n\ndef below_zero(operations: List[int]) -> bool:\n """""" You're given a list of deposit and withdrawal operations on a bank account that starts with\n zero balance. Your task is to detect if at any point the balance of account fallls below zero, and\n at that point function should return True. Otherwise it should return False.\n >>> below_zero([1, 2, 3])\n False\n >>> below_zero([1, 2, -4, 5])\n True\n """"""\n\n\nassert assert below_zero([]) == False\nassert assert below_zero([1, 2, -3, 1, 2, -3]) == False\nassert assert below_zero([1, 2, -4, 5, 6]) == True\nassert assert below_zero([1, -1, 2, -2, 5, -5, 4, -4]) == False\nassert assert below_zero([1, -1, 2, -2, 5, -5, 4, -5]) == True\nassert assert below_zero([1, -2, 2, -2, 5, -5, 4, -4]) == True\n\n# Solution:",# def below_zero(operations: List[int]) -> bool:,False,,9.72125,0,0,"[""assert below_zero([]) == False"", ""assert below_zero([1, 2, -3, 1, 2, -3]) == False"", ""assert below_zero([1, 2, -4, 5, 6]) == True"", ""assert below_zero([1, -1, 2, -2, 5, -5, 4, -4]) == False"", ""assert below_zero([1, -1, 2, -2, 5, -5, 4, -5]) == True"", ""assert below_zero([1, -2, 2, -2, 5, -5, 4, -4]) == True""]"
4,4,0.0,"from typing import List\n\n\ndef mean_absolute_deviation(numbers: List[float]) -> float:\n """""" For a given list of input numbers, calculate Mean Absolute Deviation\n around the mean of this dataset.\n Mean Absolute Deviation is the average absolute difference between each\n element and a centerpoint (mean in this case):\n MAD = average | x - x_mean |\n >>> mean_absolute_deviation([1.0, 2.0, 3.0, 4.0])\n 1.0\n """"""\n\n\nassert assert abs(mean_absolute_deviation([1.0, 2.0, 3.0]) - 2.0/3.0) < 1e-6\nassert assert abs(mean_absolute_deviation([1.0, 2.0, 3.0, 4.0]) - 1.0) < 1e-6\nassert assert abs(mean_absolute_deviation([1.0, 2.0, 3.0, 4.0, 5.0]) - 6.0/5.0) < 1e-6\n\n# Solution:","def mean_absolute_deviation(numbers: List[float]) -> float:\n """""" For a given list of input numbers, calculate Mean Absolute Deviation\n around the mean of this dataset.\n Mean Absolute Deviation is the average absolute difference between each\n element and a centerpoint (mean in this case):\n MAD = average | x - x_mean |\n >>> mean_absolute_deviation([1.0, 2.0, 3.0, 4.0])\n 1.0\n """"""\n return sum(abs(x - numbers.mean()) for x in numbers) / len(numbers)",False,,7.738408,0,0,"[""assert abs(mean_absolute_deviation([1.0, 2.0, 3.0]) - 2.0/3.0) < 1e-6"", ""assert abs(mean_absolute_deviation([1.0, 2.0, 3.0, 4.0]) - 1.0) < 1e-6"", ""assert abs(mean_absolute_deviation([1.0, 2.0, 3.0, 4.0, 5.0]) - 6.0/5.0) < 1e-6""]"
5,5,0.0,"from typing import List\n\n\ndef intersperse(numbers: List[int], delimeter: int) -> List[int]:\n """""" Insert a number 'delimeter' between every two consecutive elements of input list `numbers'\n >>> intersperse([], 4)\n []\n >>> intersperse([1, 2, 3], 4)\n [1, 4, 2, 4, 3]\n """"""\n\n\nassert assert intersperse([], 7) == []\nassert assert intersperse([5, 6, 3, 2], 8) == [5, 8, 6, 8, 3, 8, 2]\nassert assert intersperse([2, 2, 2], 2) == [2, 2, 2, 2, 2]\n\n# Solution:","def intersperse(numbers: List[int], delimeter: int) -> List[int]:\n """""" Insert a number 'delimeter' between every two consecutive elements of input list `numbers'\n >>> intersperse([], 4)\n []\n >>> intersperse([1, 2, 3], 4)\n [1, 4, 2, 4, 3]\n """"""\n return [x for x in numbers if x != delimeter] + [delimeter] + [x for x in numbers if x != delimeter]",False,,7.10123,0,0,"[""assert intersperse([], 7) == []"", ""assert intersperse([5, 6, 3, 2], 8) == [5, 8, 6, 8, 3, 8, 2]"", ""assert intersperse([2, 2, 2], 2) == [2, 2, 2, 2, 2]""]"
6,6,0.0,"from typing import List\n\n\ndef parse_nested_parens(paren_string: str) -> List[int]:\n """""" Input to this function is a string represented multiple groups for nested parentheses separated by spaces.\n For each of the group, output the deepest level of nesting of parentheses.\n E.g. (()()) has maximum two levels of nesting while ((())) has three.\n\n >>> parse_nested_parens('(()()) ((())) () ((())()())')\n [2, 3, 1, 3]\n """"""\n\n\nassert assert parse_nested_parens('(()()) ((())) () ((())()())') == [2, 3, 1, 3]\nassert assert parse_nested_parens('() (()) ((())) (((())))') == [1, 2, 3, 4]\nassert assert parse_nested_parens('(()(())((())))') == [4]\n\n# Solution:","def parse_nested_parens(paren_string: str) -> List[int]:\n """""" Input to this function is a string represented multiple groups for nested parentheses separated by spaces.\n For each of the group, output the deepest level of nesting of parentheses.\n E.g. (()()) has maximum two levels of nesting while ((())) has three.\n\n >>> parse_nested_parens('(()()) ((())) () ((())()())')\n [2, 3, 1, 3]\n """"""\n stack = []\n for paren in paren_string.split():\n if paren == '(':\n stack.append(0)\n elif paren == ')':\n stack.append(len(stack) - stack.pop())\n return [len(stack) - stack.pop()]",False,,10.012599,0,0,"[""assert parse_nested_parens('(()()) ((())) () ((())()())') == [2, 3, 1, 3]"", ""assert parse_nested_parens('() (()) ((())) (((())))') == [1, 2, 3, 4]"", ""assert parse_nested_parens('(()(())((())))') == [4]""]"
7,7,0.0,"from typing import List\n\n\ndef filter_by_substring(strings: List[str], substring: str) -> List[str]:\n """""" Filter an input list of strings only for ones that contain given substring\n >>> filter_by_substring([], 'a')\n []\n >>> filter_by_substring(['abc', 'bacd', 'cde', 'array'], 'a')\n ['abc', 'bacd', 'array']\n """"""\n\n\nassert assert filter_by_substring([], 'john') == []\nassert assert filter_by_substring(['xxx', 'asd', 'xxy', 'john doe', 'xxxAAA', 'xxx'], 'xxx') == ['xxx', 'xxxAAA', 'xxx']\nassert assert filter_by_substring(['xxx', 'asd', 'aaaxxy', 'john doe', 'xxxAAA', 'xxx'], 'xx') == ['xxx', 'aaaxxy', 'xxxAAA', 'xxx']\nassert assert filter_by_substring(['grunt', 'trumpet', 'prune', 'gruesome'], 'run') == ['grunt', 'prune']\n\n# Solution:","def filter_by_substring(strings: List[str], substring: str) -> List[str]:\n return [s for s in strings if s.find(substring) != -1]",False,,2.709349,0,0,"[""assert filter_by_substring([], 'john') == []"", ""assert filter_by_substring(['xxx', 'asd', 'xxy', 'john doe', 'xxxAAA', 'xxx'], 'xxx') == ['xxx', 'xxxAAA', 'xxx']"", ""assert filter_by_substring(['xxx', 'asd', 'aaaxxy', 'john doe', 'xxxAAA', 'xxx'], 'xx') == ['xxx', 'aaaxxy', 'xxxAAA', 'xxx']"", ""assert filter_by_substring(['grunt', 'trumpet', 'prune', 'gruesome'], 'run') == ['grunt', 'prune']""]"
8,8,0.0,"from typing import List, Tuple\n\n\ndef sum_product(numbers: List[int]) -> Tuple[int, int]:\n """""" For a given list of integers, return a tuple consisting of a sum and a product of all the integers in a list.\n Empty sum should be equal to 0 and empty product should be equal to 1.\n >>> sum_product([])\n (0, 1)\n >>> sum_product([1, 2, 3, 4])\n (10, 24)\n """"""\n\n\nassert assert sum_product([]) == (0, 1)\nassert assert sum_product([1, 1, 1]) == (3, 1)\nassert assert sum_product([100, 0]) == (100, 0)\nassert assert sum_product([3, 5, 7]) == (3 + 5 + 7, 3 * 5 * 7)\nassert assert sum_product([10]) == (10, 10)\n\n# Solution:","def sum_product(numbers: List[int]) -> Tuple[int, int]:\n """""" For a given list of integers, return a tuple consisting of a sum and a product of all the integers in a list.\n Empty sum should be equal to 0 and empty product should be equal to 1.\n >>> sum_product([])\n (0, 1)\n >>> sum_product([1, 2, 3, 4])\n (10, 24)\n """"""\n if not numbers:\n return (0, 1)\n return (sum(numbers), numbers[0] * sum(numbers))",False,,8.072402,0,0,"[""assert sum_product([]) == (0, 1)"", ""assert sum_product([1, 1, 1]) == (3, 1)"", ""assert sum_product([100, 0]) == (100, 0)"", ""assert sum_product([3, 5, 7]) == (3 + 5 + 7, 3 * 5 * 7)"", ""assert sum_product([10]) == (10, 10)""]"
9,9,0.0,"from typing import List, Tuple\n\n\ndef rolling_max(numbers: List[int]) -> List[int]:\n """""" From a given list of integers, generate a list of rolling maximum element found until given moment\n in the sequence.\n >>> rolling_max([1, 2, 3, 2, 3, 4, 2])\n [1, 2, 3, 3, 3, 4, 4]\n """"""\n\n\nassert assert rolling_max([]) == []\nassert assert rolling_max([1, 2, 3, 4]) == [1, 2, 3, 4]\nassert assert rolling_max([4, 3, 2, 1]) == [4, 4, 4, 4]\nassert assert rolling_max([3, 2, 3, 100, 3]) == [3, 3, 3, 100, 100]\n\n# Solution:","def rolling_max(numbers: List[int]) -> List[int]:\n """""" From a given list of integers, generate a list of rolling maximum element found until given moment\n in the sequence.\n >>> rolling_max([1, 2, 3, 2, 3, 4, 2])\n [1, 2, 3, 3, 3, 4, 4]\n """"""\n max_so_far = numbers[0]\n max_so_far_index = 0\n for i in range(1, len(numbers)):\n if numbers[i] > max_so_far:\n max_so_far = numbers[i]\n max_so_far_index = i\n return [max_so_far] + rolling_max(numbers[max_so_far_index:])",False,,17.05022,0,0,"[""assert rolling_max([]) == []"", ""assert rolling_max([1, 2, 3, 4]) == [1, 2, 3, 4]"", ""assert rolling_max([4, 3, 2, 1]) == [4, 4, 4, 4]"", ""assert rolling_max([3, 2, 3, 100, 3]) == [3, 3, 3, 100, 100]""]"


In [11]:
# Summary across all temperatures
print("\n" + "=" * 80)
print("OVERALL SUMMARY")
print("=" * 80)

print(f"\nüìä Files processed: {len(matching_files)}")
print(f"üìÅ Metadata exists: {'‚úÖ' if metadata_path.exists() else '‚ùå'}")
print(f"üìÅ Activations exist: {'‚úÖ' if activations_dir.exists() else '‚ùå'}")

print("\nüéâ Phase 3.5 HumanEval results inspection complete!")


OVERALL SUMMARY

üìä Files processed: 1
üìÅ Metadata exists: ‚úÖ
üìÅ Activations exist: ‚úÖ

üéâ Phase 3.5 HumanEval results inspection complete!
