In [65]:
from dotenv import load_dotenv

load_dotenv()

True

In [66]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [67]:
try:
    # Prefer Pydantic v2
    from pydantic import BaseModel, Field
except Exception:
    # Fallback for older stacks (not recommended for new code)
    from langchain_core.pydantic_v1 import BaseModel, Field

In [68]:
import os
import re
import json
import pickle
from glob import glob

from typing import Dict, List, Any, Optional
from dataclasses import dataclass

from langchain_openai import OpenAIEmbeddings, AzureChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

from sklearn.metrics.pairwise import cosine_similarity
from IPython.display import Markdown, display

In [69]:
from prompt_templates.narratives import create_narrative_angles_prompt_template
from prompt_templates.Future_plan import create_future_plan_prompt_template
from prompt_templates.Activity import create_activity_list_generator_prompt_template
from prompt_templates.Main_essay import create_main_essay_ideas_prompt_template

In [70]:
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

from tools import (
    generate_narrative_angles,
    generate_future_plan,
    generate_activity_list,
    generate_main_essay_ideas,
    route_tool_call,
)

In [71]:
embeddings_model = OpenAIEmbeddings()

llm = AzureChatOpenAI(
    deployment_name="gpt-4o",
)

In [72]:
# folder_path = "clab_data"
# applications_data = []

# # Get all JSON files in the folder
# for file_path in glob(os.path.join(folder_path, "*.json")):
#     with open(file_path, "r", encoding="utf-8") as f:
#         applications_data = json.load(f)
# print(f"Loaded {len(applications_data)} applications from {len(glob(os.path.join(folder_path, '*.json')))} files")

In [73]:
# with open("clab_data/all_data.json", "r", encoding="utf-8") as f:
#     applications_data = json.load(f)

# print(f"Loaded {len(applications_data)} applications from all_data.json")

In [74]:
# @dataclass
# class Application:  # This will store accepted applications as well as their embeddings
#     filename: str
#     content: Dict[str, Any]
#     embedding: List[float] = None

In [75]:
# applications = []

# for key, application in applications_data.items():
#     embedding = embeddings_model.embed_query(json.dumps(application))

#     application = Application(
#         filename=key,
#         content=application,
#         embedding=embedding,
#     )

#     applications.append(application)

# with open("application_embeddings.pkl", "wb") as f:
#     pickle.dump(applications, f)

In [76]:
# applications = pickle.load(open("application_embeddings.pkl", "rb"))

In [77]:
DUMMY_USER_DATA = {
    "name": "Sarah Johnson",
    "gender": "Female",
    "birth_country": "United States",
    "cultural_background": [
        "American",
        "first-generation college student",
        "rural upbringing",
        "STEM-focused",
        "environmental advocate",
    ],
    "academic": {
        "gpa": "3.95",
        "school_type": "Public High School",
        "test_scores": {"SAT": "1450", "ACT": "32"},
    },
    "activities": [
        {
            "position": "President",
            "category": "Academic",
            "description": "Led environmental science club, organized community clean-up events, coordinated with local government on sustainability initiatives",
        },
        {
            "position": "Team Captain",
            "category": "Athletics",
            "description": "Varsity soccer team captain, led team to state championship, organized team community service projects",
        },
        {
            "position": "Volunteer",
            "category": "Community Service",
            "description": "Tutored underprivileged students in math and science, developed curriculum for after-school program",
        },
        {
            "position": "Research Assistant",
            "category": "Research",
            "description": "Assisted professor in environmental impact study, analyzed data on renewable energy adoption in rural communities",
        },
    ],
    "future_plans": [
        "Study environmental engineering",
        "Work on renewable energy solutions",
        "Masters degree",
        "Start a non-profit focused on rural sustainability",
    ],
}

In [78]:
# user_embedding = embeddings_model.embed_query(json.dumps(DUMMY_USER_DATA))

In [79]:
# similarities = []

# for app in applications:
#     similarity = cosine_similarity([user_embedding], [app.embedding])[0][0]
#     similarities.append((app, similarity))

# top_3_similar = sorted(similarities, key=lambda x: x[1], reverse=True)[:3]

# print("Top 3 Accepted Applications:")
# print("\n".join([each[0].filename for each in top_3_similar]))

In [80]:
# application_narratives = []

# for each in top_3_similar:
#     temp = each[0].content

#     if "application_narrative" in temp:
#         application_narratives.append(temp["application_narrative"])

In [81]:
from langchain.agents import create_openai_tools_agent, AgentExecutor
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an elite admissions strategist. "
            "ONLY call the router tool `route_tool_call(user_request, user_profile)` once per user request. "
            "Never call other tools directly. "
            "Pass the user's request verbatim as `user_request`, and always include `user_profile`. "
            "The router will selectively call the appropriate tool and handle dependencies (e.g., auto-generate narrative before future plan). "
            "Return the router's output as the final answer."
        ),
        ("user", "USER_PROFILE(JSON): {user_profile}\n\nTASK: {input}"),
        MessagesPlaceholder("agent_scratchpad"),
    ]
)

# Only expose the router tool to the agent to prevent direct multi-tool calls
tools = [
    route_tool_call,
]

In [82]:
# prompt = ChatPromptTemplate.from_messages(
#     [
#         (
#             "system",
#             "You are an elite admissions strategist. "
#             "When calling any tool, ALWAYS include the `user_profile` argument. "
#             "Follow this strict sequence: "
#             "1) generate_narrative_angles(user_profile) "
#             "2) generate_future_plan(user_profile, narrative=<first angle>) "
#             "3) generate_activity_list(user_profile, narrative=<first angle>, future_plan) "
#             "4) generate_main_essay_ideas(user_profile, narrative=<first angle>, future_plan, activity_result). "
#             "Return the final JSON from the last tool.",
#         ),
#         ("user", "USER_PROFILE(JSON): {user_profile}\n\nTASK: {input}"),
#         MessagesPlaceholder("agent_scratchpad"),
#     ]
# )

# tools = [
#     generate_narrative_angles,
#     generate_future_plan,
#     generate_activity_list,
#     generate_main_essay_ideas,
# ]


In [83]:
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    input_key="input",
)

agent = create_openai_tools_agent(llm, tools, prompt)

executor = AgentExecutor(agent=agent, tools=tools, verbose=True, memory=memory)


In [85]:
resp = executor.invoke(
    {
        "input": "Generate future plan for this student",
        "user_profile": DUMMY_USER_DATA,
    }
)
print(resp["output"])



[1m> Entering new AgentExecutor chain...[0m


INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"


[32;1m[1;3m
Invoking: `route_tool_call` with `{'user_request': 'Generate future plan for this student', 'user_profile': {'name': 'Sarah Johnson', 'gender': 'Female', 'birth_country': 'United States', 'cultural_background': ['American', 'first-generation college student', 'rural upbringing', 'STEM-focused', 'environmental advocate'], 'academic': {'gpa': '3.95', 'school_type': 'Public High School', 'test_scores': {'SAT': '1450', 'ACT': '32'}}, 'activities': [{'position': 'President', 'category': 'Academic', 'description': 'Led environmental science club, organized community clean-up events, coordinated with local government on sustainability initiatives'}, {'position': 'Team Captain', 'category': 'Athletics', 'description': 'Varsity soccer team captain, led team to state championship, organized team community service projects'}, {'position': 'Volunteer', 'category': 'Community Service', 'description': 'Tutored underprivileged students in math and science, developed curriculum for after

INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"


[36;1m[1;3mEnvironmental engineer designing renewable energy systems to power underserved rural communities.[0m
[32;1m[1;3m[0m

[1m> Finished chain.[0m
Environmental engineer designing renewable energy systems to power underserved rural communities.


In [None]:
# resp = executor.invoke(
#     {
#         "input": "Generate future plan for this student",
#         "user_profile": DUMMY_USER_DATA,
#     }
# )
# print(resp["output"])

In [None]:
# from workflow_orchestrator import generate_application_strategy

# # Use the orchestrator instead of individual chains
# result = generate_application_strategy(DUMMY_USER_DATA)

# # Or use the agent with the new tools
# from tools import generate_complete_application_strategy

# # Add this tool to your agent
# tools = [generate_complete_application_strategy]

In [None]:
# import logging

# from workflow_orchestrator import WorkflowOrchestrator, generate_application_strategy

# # Configure logging to see the workflow progress
# logging.basicConfig(level=logging.INFO)