In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
os.environ["OPEN_AI_KEY"] = api_key

In [6]:
### FULLY GENERATED ANALYZER SCRIPT

from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

import random


# Step 1: Load the code files from the repository
loader = DirectoryLoader(
    path=r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\mobile_insight',
    glob='**/*.py',             # Only .py files
    exclude=['**/__pycache__/**', '**/*.pyc']  # Exclude cache and .pyc files
)
documents = loader.load()

# Step 2: Split the code files into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100,
    separators=['\n\n', '\n', ' ', '']
)
texts = text_splitter.split_documents(documents)

# Step 3: Create embeddings for the chunks
embeddings = OpenAIEmbeddings()

# Step 4: Store the embeddings in a vectorstore
vectorstore = Chroma.from_documents(texts, embeddings)

def get_random_context(k=5):
    # Randomly select k documents from the list of texts
    selected_docs = random.sample(texts, min(k, len(texts)))
    context = '\n\n'.join([doc.page_content for doc in selected_docs])
    return context

# Step 6: Read the two example files
with open(r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\testcases\attach_stats\attach_stats.py', 'r') as f:
    example1 = f.read()

with open(r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\testcases\interval_stats\interval_stats.py', 'r') as f:
    example2 = f.read()

# Step 7: Create the prompt template
prompt = PromptTemplate(
    input_variables=['example1', 'example2', 'context'],
    template="""
You are an AI assistant that generates code for analyzers using the given Python library. Here are two examples of analyzer code:

Example 1:
{example1}

Example 2:
{example2}

Using the above examples as a guide, please generate a new analyzer code that is different from the examples provided. 
The new analyzer should demonstrate a unique functionality or feature of the library.
NOTE: ONLY PROVIDE PYTHON CODE, DO NOT ADD ANY OTHER TEXT BEFORE OR AFTER AS THIS OUTPUT IS BEING SAVED DIRECTLY INTO A PY FILE.

You have access to the following relevant code snippets from the library:
{context}

Please make sure to properly use the library's functions and classes as per the context provided.

Generated Code:
"""
)

# Step 8: Initialize the LLM
llm = ChatOpenAI(model_name='gpt-4o', temperature=0.7)


# Step 9: Define the function to generate code examples
def generate_code_examples(num_examples):
    generated_examples = []
    chain = LLMChain(llm=llm, prompt=prompt)
    
    for _ in range(num_examples):
        # Retrieve random context documents
        context = get_random_context(k=5)
        
        # Prepare the inputs to the prompt
        inputs = {
            'example1': example1,
            'example2': example2,
            'context': context
        }
        
        # Run the chain to generate code
        result = chain.invoke(inputs)
        
        # Append the generated code to the list
        generated_code = result['text'].strip()
        
        # Append the generated code to the list
        generated_examples.append(generated_code)
        
    return generated_examples

In [16]:
# Step 10: Generate multiple code examples
num_examples_to_generate = 5  # Adjust the number as needed
generated_codes = generate_code_examples(num_examples_to_generate)

# Step 11: Save the generated codes to files or print them
for idx, code in enumerate(generated_codes):
    filename = f'generated_analyzer_{23+idx+1}.py'
    with open(filename, 'w') as f:
        f.write(code)
    print(f"Generated Code {3+idx+1} saved to {filename}")

Generated Code 4 saved to generated_analyzer_24.py
Generated Code 5 saved to generated_analyzer_25.py
Generated Code 6 saved to generated_analyzer_26.py
Generated Code 7 saved to generated_analyzer_27.py
Generated Code 8 saved to generated_analyzer_28.py


In [None]:
### GENERATED PROMPTS ANALYZER PAIRS SCRIPT

import json

# Step 1: Load real prompt-code pairs from folders
def load_prompt_code_pairs(directory):
    prompt_code_pairs = []
    
    # Each subdirectory contains a prompt (txt) and code (py) file
    for folder_name in os.listdir(directory):
        if folder_name == 'logs':
            continue
        folder_path = os.path.join(directory, folder_name)
        
        if os.path.isdir(folder_path):
            # Find the txt and py files
            prompt_file = next((f for f in os.listdir(folder_path) if f.endswith('.txt')), None)
            code_file = next((f for f in os.listdir(folder_path) if f.endswith('.py')), None)
            
            if prompt_file and code_file:
                # Load the prompt
                with open(os.path.join(folder_path, prompt_file), 'r') as f:
                    prompt = f.read().strip()
                
                # Load the code
                with open(os.path.join(folder_path, code_file), 'r') as f:
                    code = f.read().strip()
                
                # Append to the list as a dict
                prompt_code_pairs.append({
                    'prompt': prompt,
                    'code': code
                })
    
    return prompt_code_pairs

# Load real prompt-code pairs
real_examples_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\testcases'
real_prompt_code_pairs = load_prompt_code_pairs(real_examples_directory)

# Step 2: Create the prompt template for generating new prompts based on Python code
prompt_template = PromptTemplate(
    input_variables=["example1_prompt", "example1_code", "example2_prompt", "example2_code", "generated_code"],
    template="""
Below are two examples of prompt-code pairs:

Example 1:
Prompt: {example1_prompt}
Code:
{example1_code}

Example 2:
Prompt: {example2_prompt}
Code:
{example2_code}

Given the code below, generate a prompt for it following the structure of the 2 examples:

Code:
{generated_code}

Generated Prompt:
"""
)

# Step 3: Initialize the LLM
llm = ChatOpenAI(model="gpt-4o", temperature=0.7)

# Step 4: Function to generate prompts based on Python code
def generate_prompts_for_code(generated_code_folder, real_examples):
    generated_prompts = {}
    chain = LLMChain(llm=llm, prompt=prompt_template)

    # Iterate over all .py files in the generated code folder
    for py_file in os.listdir(generated_code_folder):
        if py_file.endswith('.py'):
            file_path = os.path.join(generated_code_folder, py_file)

            # Load the generated Python code
            with open(file_path, 'r') as f:
                generated_code = f.read()

            # Use the first two real examples as in-context examples
            example1 = real_examples[0]
            example2 = real_examples[1]

            # Prepare the inputs for the LLM
            inputs = {
                "example1_prompt": example1['prompt'],
                "example1_code": example1['code'],
                "example2_prompt": example2['prompt'],
                "example2_code": example2['code'],
                "generated_code": generated_code
            }

            # Generate the prompt for the current Python file
            result = chain.invoke(inputs)
            generated_prompt = result['text'].strip()

            # Store the generated prompt and the corresponding Python code in the dictionary
            generated_prompts[generated_prompt] = generated_code

    return generated_prompts

# Step 5: Define the folder where all generated Python files are saved
generated_code_folder = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\generated_dataset'

# Step 6: Generate the prompts for the Python files
generated_dataset = generate_prompts_for_code(generated_code_folder, real_prompt_code_pairs)

# Step 7: Save the generated dataset (optional, saving as JSON)
with open('synthetic_dataset.json', 'w') as f:
    json.dump(generated_dataset, f, indent=4)

print("Synthetic dataset created and saved.")


Synthetic dataset created and saved.


In [None]:
### MODIFIED GENERATION ANALYZER SCRIPT

import os
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain

from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Step 1: Load and chunk the codebase, then create embeddings
def index_codebase(directory):
    # Load all .py files from the directory
    loader = DirectoryLoader(
        path=directory,
        glob="**/*.py",  # Only .py files
        exclude=["**/__pycache__/**", "**/*.pyc"]
    )
    documents = loader.load()
    
    # Split documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100,
        separators=['\n\n', '\n', ' ', '']
    )
    chunks = text_splitter.split_documents(documents)
    
    # Create embeddings for each chunk
    embeddings = OpenAIEmbeddings()
    vectorstore = Chroma.from_documents(chunks, embeddings)
    
    return vectorstore

# Index the codebase
codebase_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\mobile_insight'
vectorstore = index_codebase(codebase_directory)

# Step 2: Load a real example analyzer code
def load_real_analyzer(file_path):
    with open(file_path, 'r') as f:
        real_analyzer_code = f.read()
    return real_analyzer_code

# Load one example real analyzer code
real_analyzer_path = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\testcases\attach_stats\attach_stats.py'  # Update with the actual path to your real analyzer file
real_analyzer_code = load_real_analyzer(real_analyzer_path)

# Step 3: Create the prompt template for modifying the real analyzer code
prompt_template = PromptTemplate(
    input_variables=["real_analyzer_code", "retrieved_context"],
    template="""
You are writing modified example python files that use an open-source project library called Mobileinsight.

Below are relevant parts of the Python codebase that provide useful context:

{retrieved_context}

Here is an example of an existing analyzer Python file from the codebase:

Real Analyzer Code:
{real_analyzer_code}

Using the codebase context and the real analyzer code as a reference, create a slightly modified version of the analyzer. 
    The new analyzer should perform a similar analysis but with some changes, such as adjusting metrics, altering data processing,
    or applying a different calculation. Ensure the modified analyzer remains functional and consistent with the codebase's style and structure.
    You do NOT need to make any drastic changes;  adding some slightly altered output through modified calculations should be enough.

NOTE: ONLY PROVIDE PYTHON CODE, DO NOT ADD ANY OTHER TEXT BEFORE OR AFTER AS THIS OUTPUT IS BEING SAVED DIRECTLY INTO A PY FILE.

Generated Modified Analyzer:
"""
)

# Step 4: Initialize the LLM
llm = ChatOpenAI(model_name='gpt-4o', temperature=0.7)

def generate_modified_analyzers(num_examples, vectorstore, real_analyzer_code):
    modified_analyzers = []
    chain = LLMChain(llm=llm, prompt=prompt_template)
    
    for _ in range(num_examples):
        # Retrieve relevant code snippets
        retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
        relevant_docs = retriever.get_relevant_documents(real_analyzer_code)
        retrieved_context = "\n\n".join([doc.page_content for doc in relevant_docs])
        
        # Prepare the inputs for the LLM
        inputs = {
            "real_analyzer_code": real_analyzer_code,
            "retrieved_context": retrieved_context
        }

        # Generate the modified analyzer code
        result = chain.invoke(inputs)
        modified_analyzer_code = result['text'].strip()

        # Append the modified analyzer code to the list
        modified_analyzers.append(modified_analyzer_code)

    return modified_analyzers

In [12]:
num_examples_to_generate = 2  # Adjust as needed
real_examples_folder = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\mobile_insight\examples'
modified_analyzers = []
i=0
for py_file in os.listdir(real_examples_folder):
    if py_file.endswith('.py'):
        file_path = os.path.join(real_examples_folder, py_file)
        with open(file_path, 'r') as f:
                real_analyzer_code = f.read()
        modified_analyzers += generate_modified_analyzers(num_examples_to_generate, vectorstore, real_analyzer_code)
        print(i)
        i +=1
        if i == 8:
             break

# Step 7: Save the modified analyzers to files (optional)
output_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\modified_generated_dataset'
os.makedirs(output_directory, exist_ok=True)

for idx, code in enumerate(modified_analyzers):
    filename = os.path.join(output_directory, f'modified_analyzer_{idx + 84 + 1}.py')
    code = code.replace("```python", "", 1)
    code = code.replace("```", "", 1)
    with open(filename, 'w') as f:
        f.write(code)
    print(f"Modified analyzer {idx + 84 + 1} saved to {filename}")

print("All modified analyzers generated and saved.")


0
1
2
3
4
5
6
7
Modified analyzer 85 saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\modified_generated_dataset\modified_analyzer_85.py
Modified analyzer 86 saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\modified_generated_dataset\modified_analyzer_86.py
Modified analyzer 87 saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\modified_generated_dataset\modified_analyzer_87.py
Modified analyzer 88 saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\modified_generated_dataset\modified_analyzer_88.py
Modified analyzer 89 saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\modified_generated_dataset\modified_analyzer_89.py
Modified analyzer 90 saved to C:\Users\bhull\Desktop\U

In [None]:
### MODIFIED GENERATION PROMPTS ANALYZER PAIRS SCRIPT

import json

# Step 1: Load real prompt-code pairs from folders
def load_prompt_code_pairs(directory):
    prompt_code_pairs = []
    
    # Each subdirectory contains a prompt (txt) and code (py) file
    for folder_name in os.listdir(directory):
        if folder_name == 'logs':
            continue
        folder_path = os.path.join(directory, folder_name)
        
        if os.path.isdir(folder_path):
            # Find the txt and py files
            prompt_file = next((f for f in os.listdir(folder_path) if f.endswith('.txt')), None)
            code_file = next((f for f in os.listdir(folder_path) if f.endswith('.py')), None)
            
            if prompt_file and code_file:
                # Load the prompt
                with open(os.path.join(folder_path, prompt_file), 'r') as f:
                    prompt = f.read().strip()
                
                # Load the code
                with open(os.path.join(folder_path, code_file), 'r') as f:
                    code = f.read().strip()
                
                # Append to the list as a dict
                prompt_code_pairs.append({
                    'prompt': prompt,
                    'code': code
                })
    
    return prompt_code_pairs

# Load real prompt-code pairs
real_examples_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\testcases'
real_prompt_code_pairs = load_prompt_code_pairs(real_examples_directory)

# Step 2: Create the prompt template for generating new prompts based on Python code
prompt_template = PromptTemplate(
    input_variables=["example1_prompt", "example1_code", "example2_prompt", "example2_code", "generated_code"],
    template="""
Below are two examples of prompt-code pairs:

Example 1:
Prompt: {example1_prompt}
Code:
{example1_code}

Example 2:
Prompt: {example2_prompt}
Code:
{example2_code}

Given the code below, generate a prompt for it following the structure of the 2 examples:

Code:
{generated_code}

Generated Prompt:
"""
)

# Step 3: Initialize the LLM
llm = ChatOpenAI(model="gpt-4o", temperature=0.7)

# Step 4: Function to generate prompts based on Python code
def generate_prompts_for_code(generated_code_folder, real_examples):
    generated_prompts = {}
    chain = LLMChain(llm=llm, prompt=prompt_template)

    # Iterate over all .py files in the generated code folder
    for py_file in os.listdir(generated_code_folder):
        if py_file.endswith('.py'):
            file_path = os.path.join(generated_code_folder, py_file)

            # Load the generated Python code
            with open(file_path, 'r') as f:
                generated_code = f.read()

            # Use the first two real examples as in-context examples
            example1 = real_examples[0]
            example2 = real_examples[1]

            # Prepare the inputs for the LLM
            inputs = {
                "example1_prompt": example1['prompt'],
                "example1_code": example1['code'],
                "example2_prompt": example2['prompt'],
                "example2_code": example2['code'],
                "generated_code": generated_code
            }

            # Generate the prompt for the current Python file
            result = chain.invoke(inputs)
            generated_prompt = result['text'].strip()

            # Store the generated prompt and the corresponding Python code in the dictionary
            generated_prompts[generated_prompt] = generated_code

    return generated_prompts

# Step 5: Define the folder where all generated Python files are saved
generated_code_folder = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\modified_generated_dataset'

# Step 6: Generate the prompts for the Python files
generated_dataset = generate_prompts_for_code(generated_code_folder, real_prompt_code_pairs)

# Step 7: Save the generated dataset (optional, saving as JSON)
with open('modified_synthetic_dataset.json', 'w') as f:
    json.dump(generated_dataset, f, indent=4)

print("Modified synthetic dataset created and saved.")


Modified synthetic dataset created and saved.


In [4]:
### MODIFIED GENERATION OUTER ANALYZER SCRIPT

import os
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain

from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Step 1: Load and chunk the codebase, then create embeddings
def index_codebase(directory):
    # Load all .py files from the directory
    loader = DirectoryLoader(
        path=directory,
        glob="**/*.py",  # Only .py files
        exclude=["**/__pycache__/**", "**/*.pyc"]
    )
    documents = loader.load()
    
    # Split documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100,
        separators=['\n\n', '\n', ' ', '']
    )
    chunks = text_splitter.split_documents(documents)
    
    # Create embeddings for each chunk
    embeddings = OpenAIEmbeddings()
    vectorstore = Chroma.from_documents(chunks, embeddings)
    
    return vectorstore

# Index the codebase
codebase_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\mobile_insight'
vectorstore = index_codebase(codebase_directory)

# Step 2: Load a real example analyzer code
def load_real_analyzer(file_path):
    with open(file_path, 'r') as f:
        real_analyzer_code = f.read()
    return real_analyzer_code



# Step 3: Create the prompt template for modifying the real analyzer code
prompt_template = PromptTemplate(
    input_variables=["real_outer_analyzer_code", "real_inner_analyzer_code", "retrieved_context"],
    template="""
You are writing modified example python files that use an open-source project library called Mobileinsight.
There are 2 analyzer files: an outer analyzer file and inner analyzer file. The inner analyzer file uses the Mobileinsight library and Analyzer class definitions to create \
a custom Analyzer class, and the outer analyzer file uses this custom Analyzer class contained in the inner analyzer file to write a script that will evaluate some metrics.
You will be given both the outer analyzer file and the inner analyzer file being used by that outer analyzer file, and you will need to use the same inner analyzer file to \
create a slightly modified outer analyzer file. The new analyzer should perform a similar analysis but with some changes, such as adjusting metrics, altering data processing,
or applying a different calculation. Ensure the modified analyzer remains functional and consistent with the codebase's style and structure.
You do NOT need to make any drastic changes;  adding some slightly altered output through modified calculations should be enough.

Below are relevant parts of the Python codebase that provide useful context:

{retrieved_context}

Here is the example of an existing outer analyzer file from the codebase:

Real Outer Analyzer Code:
{real_outer_analyzer_code}

Here is the example of an existing inner analyzer Python from the codebase:

Real Inner Analyzer Code:
{real_inner_analyzer_code}

NOTE: ONLY PROVIDE PYTHON CODE, DO NOT ADD ANY OTHER TEXT BEFORE OR AFTER AS THIS OUTPUT IS BEING SAVED DIRECTLY INTO A PY FILE.

Generated Modified Outer Analyzer:
"""
)

# Step 4: Initialize the LLM
llm = ChatOpenAI(model_name='gpt-4o', temperature=0.7)

def generate_modified_outer_analyzers(num_examples, vectorstore, real_outer_analyzer_code, real_inner_analyzer_code):
    modified_analyzers = []
    chain = LLMChain(llm=llm, prompt=prompt_template)
    
    for _ in range(num_examples):
        # Retrieve relevant code snippets
        retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
        relevant_docs = retriever.get_relevant_documents(real_outer_analyzer_code+real_inner_analyzer_code)
        retrieved_context = "\n\n".join([doc.page_content for doc in relevant_docs])
        
        # Prepare the inputs for the LLM
        inputs = {
            "real_outer_analyzer_code": real_outer_analyzer_code,
            "real_inner_analyzer_code": real_inner_analyzer_code,
            "retrieved_context": retrieved_context
        }

        # Generate the modified analyzer code
        result = chain.invoke(inputs)
        modified_analyzer_code = result['text'].strip()

        # Append the modified analyzer code to the list
        modified_analyzers.append(modified_analyzer_code)

    return modified_analyzers

In [6]:
## GENERATE ALL OUTER ANALYZERS

analyzer_files = {
    "lte_dl_retx_analyzer.py": ["dl_retx_analyzer_test.py"],
    "kpi/kpi_manager.py": ["kpi-manager-test-experimental.py", "kpi-manager-test.py"],
    "lte_measurement_analyzer.py": ["lte-measurement-example.py"],
    "lte_nas_analyzer.py": ["lte-nas-layer-example.py"],
    "msg_logger.py": ["monitor-example.py"],
    "msg_statistics.py": ["msg-statistics-example.py"],
    "uplink_latency_analyzer.py": ["offline-latency-analysis-ul.py"],
    "nr_rrc_analyzer.py": ["online-analysis-example.py"]
}

# Define paths for analyzers
real_examples_folder = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\mobile_insight\examples'
real_inner_analyzer_folder = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\mobile_insight\analyzer'
output_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\genevouter_generated_dataset'
os.makedirs(output_directory, exist_ok=True)

# Step 6: Loop through the dictionary and generate modified outer analyzers
for inner_analyzer, outer_analyzers in analyzer_files.items():
    # Load the inner analyzer code
    inner_analyzer_path = os.path.join(real_inner_analyzer_folder, inner_analyzer)
    real_inner_analyzer_code = load_real_analyzer(inner_analyzer_path)

    # Generate modified outer analyzers for each outer analyzer associated with this inner analyzer
    for outer_analyzer in outer_analyzers:
        outer_analyzer_path = os.path.join(real_examples_folder, outer_analyzer)
        real_outer_analyzer_code = load_real_analyzer(outer_analyzer_path)

        # Generate modified code
        modified_codes = generate_modified_outer_analyzers(2, vectorstore, real_outer_analyzer_code, real_inner_analyzer_code)

        # Save each modified analyzer
        for idx, modified_code in enumerate(modified_codes):
            filename = f"modified_{os.path.splitext(outer_analyzer)[0]}_{idx + 1}.py"
            output_path = os.path.join(output_directory, filename)
            code = modified_code.replace("```python", "", 1)
            code = code.replace("```", "", 1)
            with open(output_path, 'w') as f:
                f.write(code)
            print(f"Modified analyzer saved to {output_path}")

print("All modified analyzers generated and saved.")

Modified analyzer saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\genevouter_generated_dataset\modified_dl_retx_analyzer_test_1.py
Modified analyzer saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\genevouter_generated_dataset\modified_dl_retx_analyzer_test_2.py
Modified analyzer saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\genevouter_generated_dataset\modified_kpi-manager-test-experimental_1.py
Modified analyzer saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\genevouter_generated_dataset\modified_kpi-manager-test-experimental_2.py
Modified analyzer saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\genevouter_generated_dataset\modified_kpi-manager-test_1.py
Modi

In [37]:
# Load one example real analyzer code
real_outer_analyzer_path = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\mobile_insight\examples\offline-latency-analysis-ul.py'
real_outer_analyzer_code = load_real_analyzer(real_outer_analyzer_path)

real_inner_analyzer_path = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\mobile_insight\analyzer\uplink_latency_analyzer.py'
real_inner_analyzer_code = load_real_analyzer(real_inner_analyzer_path)

msg_stat_ex = generate_modified_outer_analyzers(1, vectorstore, real_outer_analyzer_code, real_inner_analyzer_code)


In [None]:
output_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\generated_datasets\outer_generated_dataset'
filename = os.path.join(output_directory, f'outer_analyzer_{2}.py')
code = msg_stat_ex[0].replace("```python", "", 1)
code = code.replace("```", "", 1)
with open(filename, 'w') as f:
    f.write(code)
print(f"Modified analyzer {1} saved to {filename}")

In [None]:
### SINGLE MODIFIED OUTER GENERATION PROMPTS ANALYZER PAIRS SCRIPT

import json

# Step 1: Load real prompt-code pairs from folders
def load_prompt_code_pairs(directory):
    prompt_code_pairs = []
    
    # Each subdirectory contains a prompt (txt) and code (py) file
    for folder_name in os.listdir(directory):
        if folder_name == 'logs':
            continue
        folder_path = os.path.join(directory, folder_name)
        
        if os.path.isdir(folder_path):
            # Find the txt and py files
            prompt_file = next((f for f in os.listdir(folder_path) if f.endswith('.txt')), None)
            code_file = next((f for f in os.listdir(folder_path) if f.endswith('.py')), None)
            
            if prompt_file and code_file:
                # Load the prompt
                with open(os.path.join(folder_path, prompt_file), 'r') as f:
                    prompt = f.read().strip()
                
                # Load the code
                with open(os.path.join(folder_path, code_file), 'r') as f:
                    code = f.read().strip()
                
                # Append to the list as a dict
                prompt_code_pairs.append({
                    'prompt': prompt,
                    'code': code
                })
    
    return prompt_code_pairs

# Load real prompt-code pairs
real_examples_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\testcases'
real_prompt_code_pairs = load_prompt_code_pairs(real_examples_directory)

# Step 2: Create the prompt template for generating new prompts based on Python code
prompt_template = PromptTemplate(
    input_variables=["example1_prompt", "example1_code", "example2_prompt", "example2_code", "inner_analyzer_code", "generated_code"],
    template="""
Below are two examples of prompt-code pairs:

Example 1:
Prompt: {example1_prompt}
Code:
{example1_code}

Example 2:
Prompt: {example2_prompt}
Code:
{example2_code}

The prompts are used by an LLM to generate the code. However, I have a different need. 
There are 2 analyzer files: an outer analyzer file and inner analyzer file. The inner analyzer file uses the Mobileinsight library and Analyzer class definitions to create \
a custom Analyzer class, and the outer analyzer file uses this custom Analyzer class contained in the inner analyzer file to write a script that will evaluate some metrics.
Instead of having a prompt like the above 2 example prompts that don't give any analyzer file (inner or outer) and only provides instructions on the new analyzer file, \
I need a prompt that can give the code for an inner analyzer file, and instructions on what is required for an outer analyzer file. This prompt will then be used to generate \
a new outer analyzer file.

Your task is the generate a prompt for the given outer analyzer file below.\
I will be giving you the outer analyzer file, as well as the inner analyzer file that was used to make it, and your task is to create a prompt with instructions on what the \
outer analyzer file should do, noting that it should be using the inner analyzer file and following a similar tyle style to the 2 examples above. \
You don't need to provide the code for the inner analyzer file; I will be taking your output and appending the inner analyzer file code to it.

Inner analyzer code:
{inner_analyzer_code}

Outer analyzer code:
{generated_code}

Generated Prompt:
"""
)

# Step 3: Initialize the LLM
llm = ChatOpenAI(model="gpt-4o", temperature=0.7)

# Step 4: Function to generate prompts based on Python code
def generate_prompts_for_outer_code(generated_code_folder, real_examples):
    generated_prompts = {}
    chain = LLMChain(llm=llm, prompt=prompt_template)

    # Iterate over all .py files in the generated code folder
    for py_file in os.listdir(generated_code_folder):
        if py_file.endswith('.py'):
            file_path = os.path.join(generated_code_folder, py_file)

            # Load the generated Python code
            with open(file_path, 'r') as f:
                generated_code = f.read()

            # Use the first two real examples as in-context examples
            example1 = real_examples[0]
            example2 = real_examples[1]

            # Prepare the inputs for the LLM
            inputs = {
                "example1_prompt": example1['prompt'],
                "example1_code": example1['code'],
                "example2_prompt": example2['prompt'],
                "example2_code": example2['code'],
                "generated_code": generated_code,
                "inner_analyzer_code": real_inner_analyzer_code
            }

            # Generate the prompt for the current Python file
            result = chain.invoke(inputs)
            generated_prompt = result['text'].strip()

            # Store the generated prompt and the corresponding Python code in the dictionary
            generated_prompts[generated_prompt] = generated_code

    return generated_prompt

generated_code_folder = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\generated_datasets\outer_generated_dataset'

# Step 6: Generate the prompts for the Python files
generated_dataset = generate_prompts_for_outer_code(generated_code_folder, real_prompt_code_pairs)
generated_dataset += '\n' + real_inner_analyzer_code

output_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\generated_datasets\outer_generated_dataset'
filename = os.path.join(output_directory, f'outer_analyzer_prompt_{2}.txt')

with open(filename, 'w') as f:
    f.write(generated_dataset)
print(f"Modified analyzer {2} saved to {filename}")

In [None]:
### FULL MODIFIED OUTER GENERATION PROMPTS ANALYZER PAIRS SCRIPT
import os
import json
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain

# Define a dictionary mapping inner analyzers to their corresponding outer analyzers
analyzer_mapping = {
    "lte_dl_retx_analyzer.py": ["dl_retx_analyzer_test.py"],
    "kpi/kpi_manager.py": ["kpi-manager-test-experimental.py", "kpi-manager-test.py"],
    "lte_measurement_analyzer.py": ["lte-measurement-example.py"],
    "lte_nas_analyzer.py": ["lte-nas-layer-example.py"],
    "msg_logger.py": ["monitor-example.py"],
    "msg_statistics.py": ["msg-statistics-example.py"],
    "uplink_latency_analyzer.py": ["offline-latency-analysis-ul.py"],
    "nr_rrc_analyzer.py": ["online-analysis-example.py"]
}

# Load prompt-code pairs for real examples
def load_prompt_code_pairs(directory):
    prompt_code_pairs = []
    for folder_name in os.listdir(directory):
        if folder_name == 'logs':
            continue
        folder_path = os.path.join(directory, folder_name)
        if os.path.isdir(folder_path):
            prompt_file = next((f for f in os.listdir(folder_path) if f.endswith('.txt')), None)
            code_file = next((f for f in os.listdir(folder_path) if f.endswith('.py')), None)
            if prompt_file and code_file:
                with open(os.path.join(folder_path, prompt_file), 'r') as f:
                    prompt = f.read().strip()
                with open(os.path.join(folder_path, code_file), 'r') as f:
                    code = f.read().strip()
                prompt_code_pairs.append({
                    'prompt': prompt,
                    'code': code
                })
    return prompt_code_pairs

# Load real prompt-code pairs
real_examples_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\testcases'
real_prompt_code_pairs = load_prompt_code_pairs(real_examples_directory)

# Define the prompt template for generating new prompts
prompt_template = PromptTemplate(
    input_variables=["example1_prompt", "example1_code", "example2_prompt", "example2_code", "inner_analyzer_code", "generated_code"],
    template="""
Below are two examples of prompt-code pairs:

Example 1:
Prompt: {example1_prompt}
Code:
{example1_code}

Example 2:
Prompt: {example2_prompt}
Code:
{example2_code}

The prompts are used by an LLM to generate the code. However, I have a different need. 
There are 2 analyzer files: an outer analyzer file and inner analyzer file. The inner analyzer file uses the Mobileinsight library and Analyzer class definitions to create \
a custom Analyzer class, and the outer analyzer file uses this custom Analyzer class contained in the inner analyzer file to write a script that will evaluate some metrics.
Instead of having a prompt like the above 2 example prompts that don't give any analyzer file (inner or outer) and only provides instructions on the new analyzer file, \
I need a prompt that can give the code for an inner analyzer file, and instructions on what is required for an outer analyzer file. This prompt will then be used to generate \
a new outer analyzer file.

Your task is the generate a prompt for the given outer analyzer file below.\
I will be giving you the outer analyzer file, as well as the inner analyzer file that was used to make it, and your task is to create a prompt with instructions on what the \
outer analyzer file should do, noting that it should be using the inner analyzer file and following a similar style to the 2 examples above. \
You don't need to provide the code for the inner analyzer file; I will be taking your output and appending the inner analyzer file code to it.

Inner analyzer code:
{inner_analyzer_code}

Outer analyzer code:
{generated_code}

Generated Prompt:
"""
)

# Initialize the LLM
llm = ChatOpenAI(model="gpt-4o", temperature=0.7)

# Function to determine the corresponding inner analyzer file based on outer analyzer filename
def find_inner_analyzer_filename(outer_filename_base):
    for inner_analyzer, outer_analyzers in analyzer_mapping.items():
        for outer_analyzer in outer_analyzers:
            if outer_filename_base.startswith(os.path.splitext(outer_analyzer)[0]):
                return inner_analyzer
    return None

# Generate prompts for the outer code files
def generate_prompts_for_outer_code(inner_analyzer_folder, generated_code_folder, real_examples):
    generated_prompts = {}
    chain = LLMChain(llm=llm, prompt=prompt_template)

    # Iterate over all .py files in the generated code folder
    for py_file in os.listdir(generated_code_folder):
        if py_file.endswith('.py') and py_file.startswith("modified_"):
            file_path = os.path.join(generated_code_folder, py_file)
            outer_filename_base = "_".join(py_file.split("_")[1:]).split(".")[0]

            # Find the inner analyzer filename that corresponds to the base name of the outer analyzer
            inner_analyzer = find_inner_analyzer_filename(outer_filename_base)
            if inner_analyzer is None:
                print(f"No matching inner analyzer found for outer analyzer: {outer_filename_base}")
                continue

            # Load the inner analyzer code
            inner_analyzer_path = os.path.join(inner_analyzer_folder, inner_analyzer)
            if not os.path.exists(inner_analyzer_path):
                print(f"Inner analyzer file not found: {inner_analyzer}")
                continue
            with open(inner_analyzer_path, 'r') as f:
                inner_analyzer_code = f.read()

            # Load the generated outer analyzer code
            with open(file_path, 'r') as f:
                generated_code = f.read()

            # Use the first two real examples as in-context examples
            example1 = real_examples[0]
            example2 = real_examples[1]

            # Prepare inputs for the LLM
            inputs = {
                "example1_prompt": example1['prompt'],
                "example1_code": example1['code'],
                "example2_prompt": example2['prompt'],
                "example2_code": example2['code'],
                "inner_analyzer_code": inner_analyzer_code,
                "generated_code": generated_code
            }

            # Generate the prompt
            result = chain.invoke(inputs)
            generated_prompt = result['text'].strip() + '\n' + inner_analyzer_code

            # Store the generated prompt
            generated_prompts[generated_prompt] = generated_code

            # Save the generated prompt to a file
            output_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\genevouter_generated_dataset'
            filename = os.path.join(output_directory, f'prompt_{outer_filename_base}.txt')
            with open(filename, 'w') as f:
                f.write(generated_prompt)
            print(f"Prompt for {py_file} saved to {filename}")

    return generated_prompts

real_inner_analyzer_folder = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\mobile_insight\analyzer'
# Define the folder containing generated outer analyzer code files
generated_code_folder = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\genevouter_generated_dataset'

# Generate prompts for each outer analyzer file
generated_dataset = generate_prompts_for_outer_code(real_inner_analyzer_folder, generated_code_folder, real_prompt_code_pairs)
print("All prompts generated and saved.")


Prompt for modified_dl_retx_analyzer_test_1.py saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\genevouter_generated_dataset\prompt_dl_retx_analyzer_test_1.txt
Prompt for modified_dl_retx_analyzer_test_2.py saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\genevouter_generated_dataset\prompt_dl_retx_analyzer_test_2.txt
Prompt for modified_kpi-manager-test-experimental_1.py saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\genevouter_generated_dataset\prompt_kpi-manager-test-experimental_1.txt
Prompt for modified_kpi-manager-test-experimental_2.py saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\genevouter_generated_dataset\prompt_kpi-manager-test-experimental_2.txt
Prompt for modified_kpi-manager-test_1.py saved to C:\Users\bhull\De

In [16]:
### MODIFIED GENERATION INNER ANALYZER SCRIPT

import os
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain

from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Step 1: Load and chunk the codebase, then create embeddings
def index_codebase(directory):
    # Load all .py files from the directory
    loader = DirectoryLoader(
        path=directory,
        glob="**/*.py",  # Only .py files
        exclude=["**/__pycache__/**", "**/*.pyc"]
    )
    documents = loader.load()
    
    # Split documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100,
        separators=['\n\n', '\n', ' ', '']
    )
    chunks = text_splitter.split_documents(documents)
    
    # Create embeddings for each chunk
    embeddings = OpenAIEmbeddings()
    vectorstore = Chroma.from_documents(chunks, embeddings)
    
    return vectorstore

# Index the codebase
codebase_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\mobile_insight'
vectorstore = index_codebase(codebase_directory)

# Step 2: Load a real example analyzer code
def load_real_analyzer(file_path):
    with open(file_path, 'r') as f:
        real_analyzer_code = f.read()
    return real_analyzer_code



# Step 3: Create the prompt template for modifying the real analyzer code
prompt_template = PromptTemplate(
    input_variables=["real_outer_analyzer_code", "real_inner_analyzer_code", "retrieved_context"],
    template="""
You are writing modified example python files that use an open-source project library called Mobileinsight.
There are 2 analyzer files: an outer analyzer file and inner analyzer file. The inner analyzer file uses the Mobileinsight library and Analyzer class definitions to create \
a custom Analyzer class, and the outer analyzer file uses this custom Analyzer class contained in the inner analyzer file to write a script that will evaluate some metrics.
You will be given both the outer analyzer file and the inner analyzer file being used by that outer analyzer file, and you will need to use the same outer analyzer file to \
create a slightly modified inner analyzer file. The new inner analyzer should perform a similar analysis but with some changes, such as adjusting metrics, altering data processing,
or applying a different calculation. Ensure the modified analyzer remains functional and consistent with the codebase's style and structure.
You do NOT need to make any drastic changes; adding some slightly altered output through modified calculations should be enough.

Below are relevant parts of the Python codebase that provide useful context:

{retrieved_context}

Here is the example of an existing outer analyzer file from the codebase:

Real Outer Analyzer Code:
{real_outer_analyzer_code}

Here is the example of an existing inner analyzer Python from the codebase:

Real Inner Analyzer Code:
{real_inner_analyzer_code}

NOTE: ONLY PROVIDE PYTHON CODE, DO NOT ADD ANY OTHER TEXT BEFORE OR AFTER AS THIS OUTPUT IS BEING SAVED DIRECTLY INTO A PY FILE.

Generated Modified Inner Analyzer:
"""
)

# Step 4: Initialize the LLM
llm = ChatOpenAI(model_name='gpt-4o', temperature=0.7)

def generate_modified_inner_analyzers(num_examples, vectorstore, real_outer_analyzer_code, real_inner_analyzer_code):
    modified_analyzers = []
    chain = LLMChain(llm=llm, prompt=prompt_template)
    
    for _ in range(num_examples):
        # Retrieve relevant code snippets
        retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
        relevant_docs = retriever.get_relevant_documents(real_outer_analyzer_code+real_inner_analyzer_code)
        retrieved_context = "\n\n".join([doc.page_content for doc in relevant_docs])
        
        # Prepare the inputs for the LLM
        inputs = {
            "real_outer_analyzer_code": real_outer_analyzer_code,
            "real_inner_analyzer_code": real_inner_analyzer_code,
            "retrieved_context": retrieved_context
        }

        # Generate the modified analyzer code
        result = chain.invoke(inputs)
        modified_analyzer_code = result['text'].strip()

        # Append the modified analyzer code to the list
        modified_analyzers.append(modified_analyzer_code)

    return modified_analyzers

In [None]:
## GENERATE ALL OUTER ANALYZERS

analyzer_files = {
    "lte_dl_retx_analyzer.py": ["dl_retx_analyzer_test.py"],
    "kpi\\kpi_manager.py": ["kpi-manager-test-experimental.py", "kpi-manager-test.py"],
    "lte_measurement_analyzer.py": ["lte-measurement-example.py"],
    "lte_nas_analyzer.py": ["lte-nas-layer-example.py"],
    "msg_logger.py": ["monitor-example.py"],
    "msg_statistics.py": ["msg-statistics-example.py"],
    "uplink_latency_analyzer.py": ["offline-latency-analysis-ul.py"],
    "nr_rrc_analyzer.py": ["online-analysis-example.py"]
}

# Define paths for analyzers
real_examples_folder = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\mobile_insight\examples'
real_inner_analyzer_folder = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\mobile_insight\analyzer'
output_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\geneinner_generated_dataset'
os.makedirs(output_directory, exist_ok=True)

# Step 6: Loop through the dictionary and generate modified outer analyzers
for inner_analyzer, outer_analyzers in analyzer_files.items():
    i = 0
    # Load the inner analyzer code
    inner_analyzer_path = os.path.join(real_inner_analyzer_folder, inner_analyzer)
    real_inner_analyzer_code = load_real_analyzer(inner_analyzer_path)

    # Generate modified outer analyzers for each outer analyzer associated with this inner analyzer
    for outer_analyzer in outer_analyzers:
        outer_analyzer_path = os.path.join(real_examples_folder, outer_analyzer)
        real_outer_analyzer_code = load_real_analyzer(outer_analyzer_path)

        # Generate modified code
        modified_codes = generate_modified_inner_analyzers(2, vectorstore, real_outer_analyzer_code, real_inner_analyzer_code)

        # Save each modified analyzer
        for idx, modified_code in enumerate(modified_codes):
            if i == 1:
                filename = f"modified_{os.path.splitext(inner_analyzer)[0]}_2_{idx + 1}.py"
            else:
                filename = f"modified_{os.path.splitext(inner_analyzer)[0]}_{idx + 1}.py"
            output_path = os.path.join(output_directory, filename)
            code = modified_code.replace("```python", "", 1)
            code = code.replace("```", "", 1)
            with open(output_path, 'w') as f:
                f.write(code)
            print(f"Modified analyzer saved to {output_path}")
        i += 1

print("All modified analyzers generated and saved.")

Modified analyzer saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\geneinner_generated_dataset\modified_lte_dl_retx_analyzer_1.py
Modified analyzer saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\geneinner_generated_dataset\modified_lte_dl_retx_analyzer_2.py
Modified analyzer saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\geneinner_generated_dataset\modified_kpi\kpi_manager_1.py
Modified analyzer saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\geneinner_generated_dataset\modified_kpi\kpi_manager_2.py
Modified analyzer saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\geneinner_generated_dataset\modified_kpi\kpi_manager_2_1.py
Modified analyzer saved to C:\Users\bh

In [42]:
# Load one example real analyzer code
real_outer_analyzer_path = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\mobile_insight\examples\offline-latency-analysis-ul.py'
real_outer_analyzer_code = load_real_analyzer(real_outer_analyzer_path)

real_inner_analyzer_path = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\mobile_insight\analyzer\uplink_latency_analyzer.py'
real_inner_analyzer_code = load_real_analyzer(real_inner_analyzer_path)

msg_stat_ex = generate_modified_outer_analyzers(1, vectorstore, real_outer_analyzer_code, real_inner_analyzer_code)


In [43]:
output_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\generated_datasets\inner_generated_dataset'
filename = os.path.join(output_directory, f'inner_analyzer_{2}.py')
code = msg_stat_ex[0].replace("```python", "", 1)
code = code.replace("```", "", 1)
with open(filename, 'w') as f:
    f.write(code)
print(f"Modified analyzer {2} saved to {filename}")

Modified analyzer 2 saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\generated_datasets\inner_generated_dataset\inner_analyzer_2.py


In [None]:
### SINGLE MODIFIED OUTER GENERATION PROMPTS ANALYZER PAIRS SCRIPT

import json

# Step 1: Load real prompt-code pairs from folders
def load_prompt_code_pairs(directory):
    prompt_code_pairs = []
    
    # Each subdirectory contains a prompt (txt) and code (py) file
    for folder_name in os.listdir(directory):
        if folder_name == 'logs':
            continue
        folder_path = os.path.join(directory, folder_name)
        
        if os.path.isdir(folder_path):
            # Find the txt and py files
            prompt_file = next((f for f in os.listdir(folder_path) if f.endswith('.txt')), None)
            code_file = next((f for f in os.listdir(folder_path) if f.endswith('.py')), None)
            
            if prompt_file and code_file:
                # Load the prompt
                with open(os.path.join(folder_path, prompt_file), 'r') as f:
                    prompt = f.read().strip()
                
                # Load the code
                with open(os.path.join(folder_path, code_file), 'r') as f:
                    code = f.read().strip()
                
                # Append to the list as a dict
                prompt_code_pairs.append({
                    'prompt': prompt,
                    'code': code
                })
    
    return prompt_code_pairs

# Load real prompt-code pairs
real_examples_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\testcases'
real_prompt_code_pairs = load_prompt_code_pairs(real_examples_directory)

# Step 2: Create the prompt template for generating new prompts based on Python code
prompt_template = PromptTemplate(
    input_variables=["example1_prompt", "example1_code", "example2_prompt", "example2_code", "outer_analyzer_code", "generated_code"],
    template="""
Below are two examples of prompt-code pairs:

Example 1:
Prompt: {example1_prompt}
Code:
{example1_code}

Example 2:
Prompt: {example2_prompt}
Code:
{example2_code}

The prompts are used by an LLM to generate the code. However, I have a different need. 
There are 2 analyzer files: an outer analyzer file and inner analyzer file. The inner analyzer file uses the Mobileinsight library and Analyzer class definitions to create \
a custom Analyzer class, and the outer analyzer file uses this custom Analyzer class contained in the inner analyzer file to write a script that will evaluate some metrics.
Instead of having a prompt like the above 2 example prompts that don't give any analyzer file (inner or outer) and only provides instructions on the new analyzer file, \
I need a prompt that can give the code for an outer analyzer file, and instructions on what is required for an inner analyzer file. This prompt will then be used to generate \
a new inner analyzer file.

Your task is the generate a prompt for the given inner analyzer file below.\
I will be giving you the outer analyzer file, as well as the inner analyzer file that was used to make it, and your task is to create a prompt with instructions on what the \
inner analyzer file should do, noting that it will be used to run the outer analyzer file and following a similar style to the 2 examples above. \
You don't need to provide the code for the outer analyzer file; I will be taking your output and appending the outer analyzer file code to it.

Outer analyzer code:
{outer_analyzer_code}

Inner analyzer code:
{generated_code}

Generated Prompt:
"""
)

# Step 3: Initialize the LLM
llm = ChatOpenAI(model="gpt-4o", temperature=0.7)

# Step 4: Function to generate prompts based on Python code
def generate_prompts_for_inner_code(generated_code_folder, real_examples):
    generated_prompts = {}
    chain = LLMChain(llm=llm, prompt=prompt_template)

    # Iterate over all .py files in the generated code folder
    for py_file in os.listdir(generated_code_folder):
        if py_file.endswith('.py'):
            file_path = os.path.join(generated_code_folder, py_file)

            # Load the generated Python code
            with open(file_path, 'r') as f:
                generated_code = f.read()

            # Use the first two real examples as in-context examples
            example1 = real_examples[0]
            example2 = real_examples[1]

            # Prepare the inputs for the LLM
            inputs = {
                "example1_prompt": example1['prompt'],
                "example1_code": example1['code'],
                "example2_prompt": example2['prompt'],
                "example2_code": example2['code'],
                "generated_code": generated_code,
                "outer_analyzer_code": real_outer_analyzer_code
            }

            # Generate the prompt for the current Python file
            result = chain.invoke(inputs)
            generated_prompt = result['text'].strip()

            # Store the generated prompt and the corresponding Python code in the dictionary
            generated_prompts[generated_prompt] = generated_code

    return generated_prompt

generated_code_folder = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\generated_datasets\inner_generated_dataset'

# Step 6: Generate the prompts for the Python files
generated_dataset = generate_prompts_for_inner_code(generated_code_folder, real_prompt_code_pairs)
generated_dataset += '\n' + real_outer_analyzer_code

output_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\generated_datasets\inner_generated_dataset'
filename = os.path.join(output_directory, f'inner_analyzer_prompt_{2}.txt')

with open(filename, 'w') as f:
    f.write(generated_dataset)
print(f"Modified analyzer {2} saved to {filename}")

Modified analyzer 2 saved to C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\generated_datasets\inner_generated_dataset\inner_analyzer_prompt_2.txt


In [None]:
import os
import json
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain

# Define a dictionary mapping outer analyzers to their corresponding inner analyzers
analyzer_mapping = {
    "dl_retx_analyzer_test.py": ["lte_dl_retx_analyzer.py"],
    "kpi-manager-test-experimental.py": ["kpi/kpi_manager.py"],
    "kpi-manager-test.py": ["kpi/kpi_manager.py"],
    "lte-measurement-example.py": ["lte_measurement_analyzer.py"],
    "lte-nas-layer-example.py": ["lte_nas_analyzer.py"],
    "monitor-example.py": ["msg_logger.py"],
    "msg-statistics-example.py": ["msg_statistics.py"],
    "offline-latency-analysis-ul.py": ["uplink_latency_analyzer.py"],
    "online-analysis-example.py": ["nr_rrc_analyzer.py"]
}

# Load prompt-code pairs for real examples
def load_prompt_code_pairs(directory):
    prompt_code_pairs = []
    for folder_name in os.listdir(directory):
        if folder_name == 'logs':
            continue
        folder_path = os.path.join(directory, folder_name)
        if os.path.isdir(folder_path):
            prompt_file = next((f for f in os.listdir(folder_path) if f.endswith('.txt')), None)
            code_file = next((f for f in os.listdir(folder_path) if f.endswith('.py')), None)
            if prompt_file and code_file:
                with open(os.path.join(folder_path, prompt_file), 'r') as f:
                    prompt = f.read().strip()
                with open(os.path.join(folder_path, code_file), 'r') as f:
                    code = f.read().strip()
                prompt_code_pairs.append({
                    'prompt': prompt,
                    'code': code
                })
    return prompt_code_pairs

# Load real prompt-code pairs
real_examples_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\testcases'
real_prompt_code_pairs = load_prompt_code_pairs(real_examples_directory)

# Define the prompt template for generating new prompts
prompt_template = PromptTemplate(
    input_variables=["example1_prompt", "example1_code", "example2_prompt", "example2_code", "outer_analyzer_code", "generated_code"],
    template="""
Below are two examples of prompt-code pairs:

Example 1:
Prompt: {example1_prompt}
Code:
{example1_code}

Example 2:
Prompt: {example2_prompt}
Code:
{example2_code}

The prompts are used by an LLM to generate the code. However, I have a different need. 
There are 2 analyzer files: an outer analyzer file and inner analyzer file. The inner analyzer file uses the Mobileinsight library and Analyzer class definitions to create \
a custom Analyzer class, and the outer analyzer file uses this custom Analyzer class contained in the inner analyzer file to write a script that will evaluate some metrics.
Instead of having a prompt like the above 2 example prompts that don't give any analyzer file (inner or outer) and only provides instructions on the new analyzer file, \
I need a prompt that can give the code for an outer analyzer file, and instructions on what is required for an inner analyzer file. This prompt will then be used to generate \
a new inner analyzer file.

Your task is to generate a prompt for the given inner analyzer file below.\
I will be giving you the outer analyzer file, as well as the inner analyzer file that was used to make it, and your task is to create a prompt with instructions on what the \
inner analyzer file should do, noting that it will be used to run the outer analyzer file and following a similar style to the 2 examples above. \
You don't need to provide the code for the outer analyzer file; I will be taking your output and appending the outer analyzer file code to it.

Outer analyzer code:
{outer_analyzer_code}

Inner analyzer code:
{generated_code}

Generated Prompt:
"""
)

# Initialize the LLM
llm = ChatOpenAI(model="gpt-4o", temperature=0.7)

# Function to determine the corresponding outer analyzer file based on inner analyzer filename
def find_outer_analyzer_filename(inner_filename_base):
    for outer_analyzer, inner_analyzers in analyzer_mapping.items():
        for inner_analyzer in inner_analyzers:
            if inner_filename_base.startswith(os.path.splitext(inner_analyzer)[0]):
                return outer_analyzer
    return None

# Generate prompts for the inner code files
def generate_prompts_for_inner_code(outer_analyzer_folder, generated_code_folder, real_examples):
    generated_prompts = {}
    chain = LLMChain(llm=llm, prompt=prompt_template)

    # Iterate over all .py files in the generated code folder
    for py_file in os.listdir(generated_code_folder):
        if py_file.endswith('.py') and py_file.startswith("modified_"):
            file_path = os.path.join(generated_code_folder, py_file)
            inner_filename_base = "_".join(py_file.split("_")[1:]).split(".")[0]

            # Find the outer analyzer filename that corresponds to the base name of the inner analyzer
            outer_analyzer = find_outer_analyzer_filename(inner_filename_base)
            if outer_analyzer is None:
                print(f"No matching outer analyzer found for inner analyzer: {inner_filename_base}")
                continue

            # Load the outer analyzer code
            outer_analyzer_path = os.path.join(outer_analyzer_folder, outer_analyzer)
            if not os.path.exists(outer_analyzer_path):
                print(f"Outer analyzer file not found: {outer_analyzer}")
                continue
            with open(outer_analyzer_path, 'r') as f:
                outer_analyzer_code = f.read()

            # Load the generated inner analyzer code
            with open(file_path, 'r') as f:
                generated_code = f.read()

            # Use the first two real examples as in-context examples
            example1 = real_examples[0]
            example2 = real_examples[1]

            # Prepare inputs for the LLM
            inputs = {
                "example1_prompt": example1['prompt'],
                "example1_code": example1['code'],
                "example2_prompt": example2['prompt'],
                "example2_code": example2['code'],
                "outer_analyzer_code": outer_analyzer_code,
                "generated_code": generated_code
            }

            # Generate the prompt
            result = chain.invoke(inputs)
            generated_prompt = result['text'].strip()

            # Store the generated prompt
            generated_prompts[generated_prompt] = generated_code

            # Save the generated prompt to a file
            output_directory = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\generated_datasets\geneinner_generated_dataset'
            filename = os.path.join(output_directory, f'prompt_{inner_filename_base}.txt')
            with open(filename, 'w') as f:
                f.write(generated_prompt)
            print(f"Prompt for {py_file} saved to {filename}")

    return generated_prompts

real_outer_analyzer_folder = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\mobile_insight\examples'
# Define the folder containing generated inner analyzer code files
generated_code_folder = r'C:\Users\bhull\Desktop\UCLA Grad\Spring 2024\CS 219\219_final_project\LLM-assisted_mobile_trace_analysis\generated_datasets\geneinner_generated_dataset'

# Generate prompts for each inner analyzer file
generated_dataset = generate_prompts_for_inner_code(real_outer_analyzer_folder, real_prompt_code_pairs)
print("All prompts generated and saved.")
