In [1]:
#@title Install required libraries
!pip install -q openai

In [9]:
#@title Import libraries and setup
import json
from openai import OpenAI
from google.colab import userdata

# Initialize OpenAI client with secure API key
client = OpenAI(
    api_key=userdata.get("OPENAI_API_KEY")  # From Colab secrets
)

In [3]:
#@title Data Loading Functions
def load_kg_data(file_path):
    """Load Knowledge Graph JSON file as raw string"""
    try:
        with open(file_path, 'r') as f:
            return json.dumps(json.load(f)), None
    except Exception as e:
        return None, f"Error loading KG file: {str(e)}"

def load_raw_data(file_path):
    """Load raw text file as string"""
    try:
        with open(file_path, 'r') as f:
            return f.read(), None
    except Exception as e:
        return None, f"Error loading raw file: {str(e)}"

In [4]:
#@title Prompt Templates
KG_PROMPT = """Analyze this Trace Compass state system knowledge graph JSON to answer the question.
Focus on node relationships, temporal patterns, and resource utilization.

Question: {question}

Knowledge Graph Context:
{context}

Provide a detailed technical analysis using only the provided graph structure and properties."""

RAW_PROMPT = """Analyze these raw system trace measurements to answer the question.
Identify patterns, anomalies, and performance characteristics.

Question: {question}

Raw Data Context:
{context}

Provide a detailed technical analysis using only the provided raw measurements."""

In [8]:
#@title Analysis Functions
def query_gpt4(prompt):
    """Execute GPT-4 query using modern client syntax"""
    try:
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3,
            max_tokens=1200
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"API Error: {str(e)}"

def analyze_kg(question, file_path):
    """Knowledge Graph analysis pipeline"""
    context, error = load_kg_data(file_path)
    if error:
        return error

    prompt = KG_PROMPT.format(
        question=question,
        context=context[:15000]  # Context window management
    )
    return query_gpt4(prompt)

def analyze_raw(question, file_path):
    """Raw data analysis pipeline"""
    context, error = load_raw_data(file_path)
    if error:
        return error

    prompt = RAW_PROMPT.format(
        question=question,
        context=context[:15000]  # Keep within token limits
    )
    return query_gpt4(prompt)

In [11]:
#@title Main Execution
def full_analysis(question, kg_path="data/cpu_usage_graph.json", raw_path="data/cpu_usage_input.txt"):
    """Run complete analysis with both approaches"""
    print(f"\n🔍 Question: {question}")
    print("="*60)

    print("\n📊 Knowledge Graph Analysis:")
    print(analyze_kg(question, kg_path))

    print("\n\n📈 Raw Data Analysis:")
    print(analyze_raw(question, raw_path))

# Example usage
if __name__ == "__main__":
    sample_question = "What is the total accumulated CPU time for thread 5130 on CPU 1?"  #@param {type:"string"}
    full_analysis(sample_question)


🔍 Question: What is the total accumulated CPU time for thread 5130 on CPU 1?

📊 Knowledge Graph Analysis:
The provided knowledge graph JSON represents a state system of a computing system, specifically focusing on the CPU and thread entities. The graph is directed and not multigraph. 

The graph contains information about four CPUs, identified by their "cpu_id" (0, 1, 2, 3), and multiple threads, identified by their "thread_id". Each CPU node contains information about its busy time in nanoseconds ("busy_time_ns"), the number of unique threads it has processed ("num_unique_threads"), the average busy time per thread in nanoseconds ("avg_busy_time_per_thread_ns"), the total number of edge occurrences ("total_edge_occurrences"), and the maximum average edge time in nanoseconds ("max_avg_edge_time_ns").

Each thread node contains the latest accumulated time in nanoseconds ("latest_accumulated_time_ns").

The question asks for the total accumulated CPU time for thread 5130 on CPU 1. Howev