# Phase 1: LLAMA + MBPP Dataset Results Inspection

This notebook inspects Phase 1 LLAMA results to verify:
- Dataset files created correctly
- Model used (LLAMA-3.1-8B)
- Prompts formatted correctly
- Code generation working
- Evaluation results (pass/fail)
- Activations captured (31 layers)

In [12]:
import pandas as pd
import os
from pathlib import Path
import glob
import json
import numpy as np

# Set pandas display options to show FULL content
pd.set_option('display.max_colwidth', None)  # Show full column content (NO LIMITS)
pd.set_option('display.max_columns', None)   # Show all columns
pd.set_option('display.width', None)         # Don't wrap to multiple lines
pd.set_option('display.max_rows', None)      # Show ALL rows (no limit)

print("✅ Pandas display options set to show FULL content")

✅ Pandas display options set to show FULL content


In [13]:
# Auto-discovery of Phase 1 LLAMA data
datasets_dir = "../data/phase1_0_llama/"
pattern = os.path.join(datasets_dir, "dataset_*.parquet")
matching_files = glob.glob(pattern)

if matching_files:
    # Sort by timestamp (filename)
    matching_files.sort(reverse=True)  # Most recent first
    print("=" * 80)
    print("PHASE 1: LLAMA + MBPP DATASET GENERATION")
    print("=" * 80)
    print(f"\n🔍 Found {len(matching_files)} dataset file(s)")
    for file in matching_files:
        file_size = os.path.getsize(file) / 1024  # KB
        print(f"  📁 {Path(file).name} ({file_size:.2f} KB)")
    
    # Use most recent file
    latest_file = matching_files[0]
    print(f"\n📄 Using most recent: {Path(latest_file).name}")
else:
    raise FileNotFoundError(f"❌ No dataset files found in {datasets_dir}")

PHASE 1: LLAMA + MBPP DATASET GENERATION

🔍 Found 1 dataset file(s)
  📁 dataset_sae_20251126_145021.parquet (281.28 KB)

📄 Using most recent: dataset_sae_20251126_145021.parquet


In [14]:
# Check activations directory
activations_dir = Path(datasets_dir) / "activations"

print("\n" + "=" * 80)
print("ACTIVATIONS DIRECTORY")
print("=" * 80)

if activations_dir.exists():
    print(f"\n✅ Activations directory exists: {activations_dir}")
    
    # Check subdirectories
    subdirs = [d for d in activations_dir.iterdir() if d.is_dir()]
    print(f"\n📁 Subdirectories ({len(subdirs)}):")
    for subdir in subdirs:
        files = list(subdir.glob("*.npz"))
        print(f"  - {subdir.name}/: {len(files)} activation files")
        if files:
            print(f"    Sample: {files[0].name}")
            
            # Check activation dimensions for LLAMA (should be 4096)
            sample_data = np.load(files[0])
            for key in sample_data.files:
                print(f"    Shape ({key}): {sample_data[key].shape}")
else:
    print(f"\n⚠️  Activations directory not found: {activations_dir}")


ACTIVATIONS DIRECTORY

✅ Activations directory exists: ../data/phase1_0_llama/activations

📁 Subdirectories (2):
  - incorrect/: 7626 activation files
    Sample: 926_layer_31.npz
    Shape (layer_31): (1, 4096)
  - correct/: 7533 activation files
    Sample: 811_layer_15.npz
    Shape (layer_15): (1, 4096)


In [15]:
# Load and display dataset
print(f"\n{'=' * 80}")
print(f"DATASET CONTENTS")
print("=" * 80)

df = pd.read_parquet(latest_file)

# Basic stats
print(f"\n📊 Statistics:")
print(f"  - Total records: {len(df)}")
print(f"  - Columns: {list(df.columns)}")

# Pass rate
if 'test_passed' in df.columns:
    n_passed = df['test_passed'].sum()
    pass_rate = n_passed / len(df) * 100
    print(f"\n✅ Pass Rate:")
    print(f"  - Passed: {n_passed}/{len(df)} ({pass_rate:.2f}%)")
    print(f"  - Failed: {len(df) - n_passed}/{len(df)} ({100 - pass_rate:.2f}%)")

# Generation time stats
if 'generation_time' in df.columns:
    print(f"\n⏱️  Generation Time:")
    print(f"  - Mean: {df['generation_time'].mean():.2f}s")
    print(f"  - Median: {df['generation_time'].median():.2f}s")
    print(f"  - Min: {df['generation_time'].min():.2f}s")
    print(f"  - Max: {df['generation_time'].max():.2f}s")


DATASET CONTENTS

📊 Statistics:
  - Total records: 489
  - Columns: ['task_id', 'text', 'code', 'test_list', 'cyclomatic_complexity', 'text_x', 'code_x', 'test_list_x', 'cyclomatic_complexity_x', 'text_y', 'code_y', 'test_list_y', 'cyclomatic_complexity_y', 'generated_code', 'raw_output', 'test_passed']

✅ Pass Rate:
  - Passed: 243/489 (49.69%)
  - Failed: 246/489 (50.31%)


In [16]:
# Display all records
print(f"\n{'=' * 80}")
print(f"ALL RECORDS:")
print("=" * 80)
display(df)


ALL RECORDS:


Unnamed: 0,task_id,text,code,test_list,cyclomatic_complexity,text_x,code_x,test_list_x,cyclomatic_complexity_x,text_y,code_y,test_list_y,cyclomatic_complexity_y,generated_code,raw_output,test_passed
0,2,Write a function to find the similar elements from the given two tuple lists.,"def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)","[assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5), assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4), assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14)]",1,Write a function to find the similar elements from the given two tuple lists.,"def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)","[assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5), assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4), assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14)]",1.0,Write a function to find the similar elements from the given two tuple lists.,"def similar_elements(test_tup1, test_tup2):\r\n res = tuple(set(test_tup1) & set(test_tup2))\r\n return (res)","[assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5), assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4), assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14)]",1.0,"def similar_elements(t1, t2):\n return tuple(set(t1).intersection(t2))","Write a function to find the similar elements from the given two tuple lists.\n\nassert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\nassert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4)\nassert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14)\n\n# Solution: \ndef similar_elements(t1, t2):\n return tuple(set(t1).intersection(t2))\n",True
1,3,Write a python function to identify non-prime numbers.,"import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result","[assert is_not_prime(2) == False, assert is_not_prime(10) == True, assert is_not_prime(35) == True]",3,Write a python function to identify non-prime numbers.,"import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result","[assert is_not_prime(2) == False, assert is_not_prime(10) == True, assert is_not_prime(35) == True]",3.0,Write a python function to identify non-prime numbers.,"import math\r\ndef is_not_prime(n):\r\n result = False\r\n for i in range(2,int(math.sqrt(n)) + 1):\r\n if n % i == 0:\r\n result = True\r\n return result","[assert is_not_prime(2) == False, assert is_not_prime(10) == True, assert is_not_prime(35) == True]",3.0,"def is_not_prime(n):\n if n <= 1:\n return True\n for i in range(2, n):\n if n % i == 0:\n return True\n return False","Write a python function to identify non-prime numbers.\n\nassert is_not_prime(2) == False\nassert is_not_prime(10) == True\nassert is_not_prime(35) == True\n\n# Solution: \ndef is_not_prime(n):\n if n <= 1:\n return True\n for i in range(2, n):\n if n % i == 0:\n return True\n return False\n",True
2,4,Write a function to find the largest integers from a given list of numbers using heap queue algorithm.,"import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums","[assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] , assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] , assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35]]",1,Write a function to find the largest integers from a given list of numbers using heap queue algorithm.,"import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums","[assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] , assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] , assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35]]",1.0,Write a function to find the largest integers from a given list of numbers using heap queue algorithm.,"import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n largest_nums = hq.nlargest(n, nums)\r\n return largest_nums","[assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] , assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] , assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35]]",1.0,"def heap_queue_largest(lst, k):\n heap = []\n for i in range(k):\n heap.append(lst[i])\n heapify(heap)\n for i in range(k, len(lst)):\n if lst[i] > heap[0]:\n heap[0] = lst[i]\n heapify(heap)\n return heap","Write a function to find the largest integers from a given list of numbers using heap queue algorithm.\n\nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35]\n\n# Solution: \ndef heap_queue_largest(lst, k):\n heap = []\n for i in range(k):\n heap.append(lst[i])\n heapify(heap)\n for i in range(k, len(lst)):\n if lst[i] > heap[0]:\n heap[0] = lst[i]\n heapify(heap)\n return heap\n",False
3,6,Write a python function to check whether the two numbers differ at one bit position only or not.,"def is_Power_Of_Two (x): \r\n return x and (not(x & (x - 1))) \r\ndef differ_At_One_Bit_Pos(a,b): \r\n return is_Power_Of_Two(a ^ b)","[assert differ_At_One_Bit_Pos(13,9) == True, assert differ_At_One_Bit_Pos(15,8) == False, assert differ_At_One_Bit_Pos(2,4) == False]",2,Write a python function to check whether the two numbers differ at one bit position only or not.,"def is_Power_Of_Two (x): \r\n return x and (not(x & (x - 1))) \r\ndef differ_At_One_Bit_Pos(a,b): \r\n return is_Power_Of_Two(a ^ b)","[assert differ_At_One_Bit_Pos(13,9) == True, assert differ_At_One_Bit_Pos(15,8) == False, assert differ_At_One_Bit_Pos(2,4) == False]",2.0,Write a python function to check whether the two numbers differ at one bit position only or not.,"def is_Power_Of_Two (x): \r\n return x and (not(x & (x - 1))) \r\ndef differ_At_One_Bit_Pos(a,b): \r\n return is_Power_Of_Two(a ^ b)","[assert differ_At_One_Bit_Pos(13,9) == True, assert differ_At_One_Bit_Pos(15,8) == False, assert differ_At_One_Bit_Pos(2,4) == False]",2.0,"def differ_At_One_Bit_Pos(a,b):\n a_bin = bin(a)\n b_bin = bin(b)\n a_bin = a_bin[2:]\n b_bin = b_bin[2:]\n if len(a_bin) > len(b_bin):\n b_bin = '0'*(len(a_bin)-len(b_bin)) + b_bin\n elif len(a_bin) < len(b_bin):\n a_bin = '0'*(len(b_bin)-len(a_bin)) + a_bin\n for i in range(len(a_bin)):\n if a_bin[i]!= b_bin[i]:\n if a_bin[i] == '0' and b_bin[i] == '1':\n return True\n elif a_bin[i] == '1' and b_bin[i] == '0':\n return True\n return False","Write a python function to check whether the two numbers differ at one bit position only or not.\n\nassert differ_At_One_Bit_Pos(13,9) == True\nassert differ_At_One_Bit_Pos(15,8) == False\nassert differ_At_One_Bit_Pos(2,4) == False\n\n# Solution:-\n\ndef differ_At_One_Bit_Pos(a,b):\n a_bin = bin(a)\n b_bin = bin(b)\n a_bin = a_bin[2:]\n b_bin = b_bin[2:]\n if len(a_bin) > len(b_bin):\n b_bin = '0'*(len(a_bin)-len(b_bin)) + b_bin\n elif len(a_bin) < len(b_bin):\n a_bin = '0'*(len(b_bin)-len(a_bin)) + a_bin\n for i in range(len(a_bin)):\n if a_bin[i]!= b_bin[i]:\n if a_bin[i] == '0' and b_bin[i] == '1':\n return True\n elif a_bin[i] == '1' and b_bin[i] == '0':\n return True\n return False",False
4,7,Write a function to find all words which are at least 4 characters long in a string by using regex.,"import re\r\ndef find_char_long(text):\r\n return (re.findall(r""\b\w{4,}\b"", text))","[assert find_char_long('Please move back to stream') == ['Please', 'move', 'back', 'stream'], assert find_char_long('Jing Eco and Tech') == ['Jing', 'Tech'], assert find_char_long('Jhingai wulu road Zone 3') == ['Jhingai', 'wulu', 'road', 'Zone']]",1,Write a function to find all words which are at least 4 characters long in a string by using regex.,"import re\r\ndef find_char_long(text):\r\n return (re.findall(r""\b\w{4,}\b"", text))","[assert find_char_long('Please move back to stream') == ['Please', 'move', 'back', 'stream'], assert find_char_long('Jing Eco and Tech') == ['Jing', 'Tech'], assert find_char_long('Jhingai wulu road Zone 3') == ['Jhingai', 'wulu', 'road', 'Zone']]",1.0,Write a function to find all words which are at least 4 characters long in a string by using regex.,"import re\r\ndef find_char_long(text):\r\n return (re.findall(r""\b\w{4,}\b"", text))","[assert find_char_long('Please move back to stream') == ['Please', 'move', 'back', 'stream'], assert find_char_long('Jing Eco and Tech') == ['Jing', 'Tech'], assert find_char_long('Jhingai wulu road Zone 3') == ['Jhingai', 'wulu', 'road', 'Zone']]",1.0,"def find_char_long(string):\n return re.findall(r'\b\w{4,}\b', string)","Write a function to find all words which are at least 4 characters long in a string by using regex.\n\nassert find_char_long('Please move back to stream') == ['Please','move', 'back','stream']\nassert find_char_long('Jing Eco and Tech') == ['Jing', 'Tech']\nassert find_char_long('Jhingai wulu road Zone 3') == ['Jhingai', 'wulu', 'road', 'Zone']\n\n# Solution: \ndef find_char_long(string):\n return re.findall(r'\b\w{4,}\b', string)",True
5,9,Write a python function to find the minimum number of rotations required to get the same string.,"def find_Rotations(str): \r\n tmp = str + str\r\n n = len(str) \r\n for i in range(1,n + 1): \r\n substring = tmp[i: i+n] \r\n if (str == substring): \r\n return i \r\n return n","[assert find_Rotations(""aaaa"") == 1, assert find_Rotations(""ab"") == 2, assert find_Rotations(""abc"") == 3]",3,Write a python function to find the minimum number of rotations required to get the same string.,"def find_Rotations(str): \r\n tmp = str + str\r\n n = len(str) \r\n for i in range(1,n + 1): \r\n substring = tmp[i: i+n] \r\n if (str == substring): \r\n return i \r\n return n","[assert find_Rotations(""aaaa"") == 1, assert find_Rotations(""ab"") == 2, assert find_Rotations(""abc"") == 3]",3.0,Write a python function to find the minimum number of rotations required to get the same string.,"def find_Rotations(str): \r\n tmp = str + str\r\n n = len(str) \r\n for i in range(1,n + 1): \r\n substring = tmp[i: i+n] \r\n if (str == substring): \r\n return i \r\n return n","[assert find_Rotations(""aaaa"") == 1, assert find_Rotations(""ab"") == 2, assert find_Rotations(""abc"") == 3]",3.0,def find_Rotations(string):\n rotations = 0\n for i in range(len(string)):\n if string[i]!= string[0]:\n rotations += 1\n return rotations,"Write a python function to find the minimum number of rotations required to get the same string.\n\nassert find_Rotations(""aaaa"") == 1\nassert find_Rotations(""ab"") == 2\nassert find_Rotations(""abc"") == 3\n\n# Solution: \ndef find_Rotations(string):\n rotations = 0\n for i in range(len(string)):\n if string[i]!= string[0]:\n rotations += 1\n return rotations\n",False
6,11,Write a python function to remove first and last occurrence of a given character from the string.,"def remove_Occ(s,ch): \r\n for i in range(len(s)): \r\n if (s[i] == ch): \r\n s = s[0 : i] + s[i + 1:] \r\n break\r\n for i in range(len(s) - 1,-1,-1): \r\n if (s[i] == ch): \r\n s = s[0 : i] + s[i + 1:] \r\n break\r\n return s","[assert remove_Occ(""hello"",""l"") == ""heo"", assert remove_Occ(""abcda"",""a"") == ""bcd"", assert remove_Occ(""PHP"",""P"") == ""H""]",5,Write a python function to remove first and last occurrence of a given character from the string.,"def remove_Occ(s,ch): \r\n for i in range(len(s)): \r\n if (s[i] == ch): \r\n s = s[0 : i] + s[i + 1:] \r\n break\r\n for i in range(len(s) - 1,-1,-1): \r\n if (s[i] == ch): \r\n s = s[0 : i] + s[i + 1:] \r\n break\r\n return s","[assert remove_Occ(""hello"",""l"") == ""heo"", assert remove_Occ(""abcda"",""a"") == ""bcd"", assert remove_Occ(""PHP"",""P"") == ""H""]",5.0,Write a python function to remove first and last occurrence of a given character from the string.,"def remove_Occ(s,ch): \r\n for i in range(len(s)): \r\n if (s[i] == ch): \r\n s = s[0 : i] + s[i + 1:] \r\n break\r\n for i in range(len(s) - 1,-1,-1): \r\n if (s[i] == ch): \r\n s = s[0 : i] + s[i + 1:] \r\n break\r\n return s","[assert remove_Occ(""hello"",""l"") == ""heo"", assert remove_Occ(""abcda"",""a"") == ""bcd"", assert remove_Occ(""PHP"",""P"") == ""H""]",5.0,"def remove_Occ(string, char):\n if char not in string:\n return string\n else:\n return string.replace(char, """", 1).replace(char, """", 1)","Write a python function to remove first and last occurrence of a given character from the string.\n\nassert remove_Occ(""hello"",""l"") == ""heo""\nassert remove_Occ(""abcda"",""a"") == ""bcd""\nassert remove_Occ(""PHP"",""P"") == ""H""\n\n# Solution:-\n\ndef remove_Occ(string, char):\n if char not in string:\n return string\n else:\n return string.replace(char, """", 1).replace(char, """", 1)\n",True
7,12,Write a function to sort a given matrix in ascending order according to the sum of its rows.,"def sort_matrix(M):\r\n result = sorted(M, key=sum)\r\n return result","[assert sort_matrix([[1, 2, 3], [2, 4, 5], [1, 1, 1]])==[[1, 1, 1], [1, 2, 3], [2, 4, 5]], assert sort_matrix([[1, 2, 3], [-2, 4, -5], [1, -1, 1]])==[[-2, 4, -5], [1, -1, 1], [1, 2, 3]], assert sort_matrix([[5,8,9],[6,4,3],[2,1,4]])==[[2, 1, 4], [6, 4, 3], [5, 8, 9]]]",1,Write a function to sort a given matrix in ascending order according to the sum of its rows.,"def sort_matrix(M):\r\n result = sorted(M, key=sum)\r\n return result","[assert sort_matrix([[1, 2, 3], [2, 4, 5], [1, 1, 1]])==[[1, 1, 1], [1, 2, 3], [2, 4, 5]], assert sort_matrix([[1, 2, 3], [-2, 4, -5], [1, -1, 1]])==[[-2, 4, -5], [1, -1, 1], [1, 2, 3]], assert sort_matrix([[5,8,9],[6,4,3],[2,1,4]])==[[2, 1, 4], [6, 4, 3], [5, 8, 9]]]",1.0,Write a function to sort a given matrix in ascending order according to the sum of its rows.,"def sort_matrix(M):\r\n result = sorted(M, key=sum)\r\n return result","[assert sort_matrix([[1, 2, 3], [2, 4, 5], [1, 1, 1]])==[[1, 1, 1], [1, 2, 3], [2, 4, 5]], assert sort_matrix([[1, 2, 3], [-2, 4, -5], [1, -1, 1]])==[[-2, 4, -5], [1, -1, 1], [1, 2, 3]], assert sort_matrix([[5,8,9],[6,4,3],[2,1,4]])==[[2, 1, 4], [6, 4, 3], [5, 8, 9]]]",1.0,"def sort_matrix(matrix):\n return sorted(matrix, key=sum)","Write a function to sort a given matrix in ascending order according to the sum of its rows.\n\nassert sort_matrix([[1, 2, 3], [2, 4, 5], [1, 1, 1]])==[[1, 1, 1], [1, 2, 3], [2, 4, 5]]\nassert sort_matrix([[1, 2, 3], [-2, 4, -5], [1, -1, 1]])==[[-2, 4, -5], [1, -1, 1], [1, 2, 3]]\nassert sort_matrix([[5,8,9],[6,4,3],[2,1,4]])==[[2, 1, 4], [6, 4, 3], [5, 8, 9]]\n\n# Solution: O(n^2) time | O(n) space\ndef sort_matrix(matrix):\n return sorted(matrix, key=sum)\n\n# Solution: O(n^2) time | O(1) space\ndef sort_matrix(matrix):\n for i in range(len(matrix)):\n for j in range(i+1, len(matrix)):\n if sum(matrix[i]) > sum(matrix[j]):\n matrix[i], matrix[j] = matrix[j], matrix[i]\n return matrix",True
8,13,Write a function to count the most common words in a dictionary.,from collections import Counter\r\ndef count_common(words):\r\n word_counts = Counter(words)\r\n top_four = word_counts.most_common(4)\r\n return (top_four)\r\n,"[assert count_common(['red','green','black','pink','black','white','black','eyes','white','black','orange','pink','pink','red','red','white','orange','white',""black"",'pink','green','green','pink','green','pink','white','orange',""orange"",'red']) == [('pink', 6), ('black', 5), ('white', 5), ('red', 4)], assert count_common(['one', 'two', 'three', 'four', 'five', 'one', 'two', 'one', 'three', 'one']) == [('one', 4), ('two', 2), ('three', 2), ('four', 1)], assert count_common(['Facebook', 'Apple', 'Amazon', 'Netflix', 'Google', 'Apple', 'Netflix', 'Amazon']) == [('Apple', 2), ('Amazon', 2), ('Netflix', 2), ('Facebook', 1)]]",1,Write a function to count the most common words in a dictionary.,from collections import Counter\r\ndef count_common(words):\r\n word_counts = Counter(words)\r\n top_four = word_counts.most_common(4)\r\n return (top_four)\r\n,"[assert count_common(['red','green','black','pink','black','white','black','eyes','white','black','orange','pink','pink','red','red','white','orange','white',""black"",'pink','green','green','pink','green','pink','white','orange',""orange"",'red']) == [('pink', 6), ('black', 5), ('white', 5), ('red', 4)], assert count_common(['one', 'two', 'three', 'four', 'five', 'one', 'two', 'one', 'three', 'one']) == [('one', 4), ('two', 2), ('three', 2), ('four', 1)], assert count_common(['Facebook', 'Apple', 'Amazon', 'Netflix', 'Google', 'Apple', 'Netflix', 'Amazon']) == [('Apple', 2), ('Amazon', 2), ('Netflix', 2), ('Facebook', 1)]]",1.0,Write a function to count the most common words in a dictionary.,from collections import Counter\r\ndef count_common(words):\r\n word_counts = Counter(words)\r\n top_four = word_counts.most_common(4)\r\n return (top_four)\r\n,"[assert count_common(['red','green','black','pink','black','white','black','eyes','white','black','orange','pink','pink','red','red','white','orange','white',""black"",'pink','green','green','pink','green','pink','white','orange',""orange"",'red']) == [('pink', 6), ('black', 5), ('white', 5), ('red', 4)], assert count_common(['one', 'two', 'three', 'four', 'five', 'one', 'two', 'one', 'three', 'one']) == [('one', 4), ('two', 2), ('three', 2), ('four', 1)], assert count_common(['Facebook', 'Apple', 'Amazon', 'Netflix', 'Google', 'Apple', 'Netflix', 'Amazon']) == [('Apple', 2), ('Amazon', 2), ('Netflix', 2), ('Facebook', 1)]]",1.0,"def count_common(words):\n word_count = {}\n for word in words:\n if word in word_count:\n word_count[word] += 1\n else:\n word_count[word] = 1\n return sorted(word_count.items(), key=lambda x: x[1], reverse=True)","Write a function to count the most common words in a dictionary.\n\nassert count_common(['red','green','black','pink','black','white','black','eyes','white','black','orange','pink','pink','red','red','white','orange','white',""black"",'pink','green','green','pink','green','pink','white','orange',""orange"",'red']) == [('pink', 6), ('black', 5), ('white', 5), ('red', 4)]\nassert count_common(['one', 'two', 'three', 'four', 'five', 'one', 'two', 'one', 'three', 'one']) == [('one', 4), ('two', 2), ('three', 2), ('four', 1)]\nassert count_common(['Facebook', 'Apple', 'Amazon', 'Netflix', 'Google', 'Apple', 'Netflix', 'Amazon']) == [('Apple', 2), ('Amazon', 2), ('Netflix', 2), ('Facebook', 1)]\n\n# Solution: \ndef count_common(words):\n word_count = {}\n for word in words:\n if word in word_count:\n word_count[word] += 1\n else:\n word_count[word] = 1\n return sorted(word_count.items(), key=lambda x: x[1], reverse=True)\n",False
9,15,Write a function to split a string at lowercase letters.,"import re\r\ndef split_lowerstring(text):\r\n return (re.findall('[a-z][^a-z]*', text))","[assert split_lowerstring(""AbCd"")==['bC','d'], assert split_lowerstring(""Python"")==['y', 't', 'h', 'o', 'n'], assert split_lowerstring(""Programming"")==['r', 'o', 'g', 'r', 'a', 'm', 'm', 'i', 'n', 'g']]",1,Write a function to split a string at lowercase letters.,"import re\r\ndef split_lowerstring(text):\r\n return (re.findall('[a-z][^a-z]*', text))","[assert split_lowerstring(""AbCd"")==['bC','d'], assert split_lowerstring(""Python"")==['y', 't', 'h', 'o', 'n'], assert split_lowerstring(""Programming"")==['r', 'o', 'g', 'r', 'a', 'm', 'm', 'i', 'n', 'g']]",1.0,Write a function to split a string at lowercase letters.,"import re\r\ndef split_lowerstring(text):\r\n return (re.findall('[a-z][^a-z]*', text))","[assert split_lowerstring(""AbCd"")==['bC','d'], assert split_lowerstring(""Python"")==['y', 't', 'h', 'o', 'n'], assert split_lowerstring(""Programming"")==['r', 'o', 'g', 'r', 'a', 'm', 'm', 'i', 'n', 'g']]",1.0,def split_lowerstring(s):\n return [s[i] for i in range(len(s)) if s[i].islower()],"Write a function to split a string at lowercase letters.\n\nassert split_lowerstring(""AbCd"")==['bC','d']\nassert split_lowerstring(""Python"")==['y', 't', 'h', 'o', 'n']\nassert split_lowerstring(""Programming"")==['r', 'o', 'g', 'r', 'a','m','m', 'i', 'n', 'g']\n\n# Solution: \ndef split_lowerstring(s):\n return [s[i] for i in range(len(s)) if s[i].islower()]\n",False


In [None]:
# Show sample correct and incorrect generations
print("\n" + "=" * 80)
print("SAMPLE CORRECT GENERATION")
print("=" * 80)

if 'test_passed' in df.columns and df['test_passed'].any():
    correct_sample = df[df['test_passed'] == True].iloc[0]
    print(f"\n📋 Task ID: {correct_sample.get('task_id', 'N/A')}")
    print(f"\n📝 Prompt:")
    print(correct_sample.get('prompt', 'N/A')[:500] + "..." if len(str(correct_sample.get('prompt', ''))) > 500 else correct_sample.get('prompt', 'N/A'))
    print(f"\n💻 Generated Code:")
    print(correct_sample.get('generated_code', 'N/A'))
else:
    print("\n⚠️  No correct generations found")

In [None]:
# Show sample incorrect generation
print("\n" + "=" * 80)
print("SAMPLE INCORRECT GENERATION")
print("=" * 80)

if 'test_passed' in df.columns and (~df['test_passed']).any():
    incorrect_sample = df[df['test_passed'] == False].iloc[0]
    print(f"\n📋 Task ID: {incorrect_sample.get('task_id', 'N/A')}")
    print(f"\n📝 Prompt:")
    print(incorrect_sample.get('prompt', 'N/A')[:500] + "..." if len(str(incorrect_sample.get('prompt', ''))) > 500 else incorrect_sample.get('prompt', 'N/A'))
    print(f"\n💻 Generated Code:")
    print(incorrect_sample.get('generated_code', 'N/A'))
    if 'error_message' in incorrect_sample and incorrect_sample['error_message']:
        print(f"\n❌ Error:")
        print(incorrect_sample['error_message'])
else:
    print("\n⚠️  No incorrect generations found")

In [None]:
# Summary
print("\n" + "=" * 80)
print("OVERALL SUMMARY")
print("=" * 80)

print(f"\n📊 Dataset file: {Path(latest_file).name}")
print(f"📊 Total records: {len(df)}")
if 'test_passed' in df.columns:
    print(f"✅ Pass rate: {df['test_passed'].sum()}/{len(df)} ({df['test_passed'].mean()*100:.2f}%)")
print(f"📁 Activations exist: {'✅' if activations_dir.exists() else '❌'}")

# Check if activations have correct dimension for LLAMA (4096)
if activations_dir.exists():
    task_acts_dir = activations_dir / "task_activations"
    if task_acts_dir.exists():
        sample_files = list(task_acts_dir.glob("*.npz"))[:1]
        if sample_files:
            data = np.load(sample_files[0])
            for key in data.files:
                dim = data[key].shape[-1]
                expected = 4096  # LLAMA hidden size
                status = '✅' if dim == expected else '❌'
                print(f"{status} Activation dimension: {dim} (expected {expected} for LLAMA)")

print("\n🎉 Phase 1 LLAMA results inspection complete!")