In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
import os
import requests

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
try:
    # Prefer Pydantic v2
    from pydantic import BaseModel, Field
except Exception:
    # Fallback for older stacks (not recommended for new code)
    from langchain_core.pydantic_v1 import BaseModel, Field

In [5]:
import os
import re
import json
import pickle
from glob import glob

from typing import Dict, List, Any, Optional
from dataclasses import dataclass

from langchain_openai import OpenAIEmbeddings, AzureChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

from sklearn.metrics.pairwise import cosine_similarity
from IPython.display import Markdown, display

In [6]:
from prompt_templates.narratives import create_narrative_angles_prompt_template
from prompt_templates.Future_plan import create_future_plan_prompt_template
from prompt_templates.Activity import create_activity_list_generator_prompt_template
from prompt_templates.Main_essay import create_main_essay_ideas_prompt_template

In [7]:
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

from tools import (
    generate_narrative_angles,
    generate_future_plan,
    generate_activity_list,
    generate_main_essay_ideas,
    route_tool_call,
    json_to_markdown_llm,
    web_search,
    rewrite_search_query,

)

In [8]:
from complete_strategy_tools import (generate_complete_application_strategy, route_tool_call_complete)

In [9]:
embeddings_model = OpenAIEmbeddings()

llm = AzureChatOpenAI(
    deployment_name="gpt-4o",
)

In [10]:
# folder_path = "clab_data"
# applications_data = []

# # Get all JSON files in the folder
# for file_path in glob(os.path.join(folder_path, "*.json")):
#     with open(file_path, "r", encoding="utf-8") as f:
#         applications_data = json.load(f)
# print(f"Loaded {len(applications_data)} applications from {len(glob(os.path.join(folder_path, '*.json')))} files")

In [11]:
# with open("clab_data/all_data.json", "r", encoding="utf-8") as f:
#     applications_data = json.load(f)

# print(f"Loaded {len(applications_data)} applications from all_data.json")

In [12]:
# @dataclass
# class Application:  # This will store accepted applications as well as their embeddings
#     filename: str
#     content: Dict[str, Any]
#     embedding: List[float] = None

In [13]:
# applications = []

# for key, application in applications_data.items():
#     embedding = embeddings_model.embed_query(json.dumps(application))

#     application = Application(
#         filename=key,
#         content=application,
#         embedding=embedding,
#     )

#     applications.append(application)

# with open("application_embeddings.pkl", "wb") as f:
#     pickle.dump(applications, f)

In [14]:
# applications = pickle.load(open("application_embeddings.pkl", "rb"))

In [15]:
DUMMY_USER_DATA = {
    "name": "Sarah Johnson",
    "gender": "Female",
    "birth_country": "United States",
    "cultural_background": [
        "American",
        "first-generation college student",
        "rural upbringing",
        "STEM-focused",
        "environmental advocate",
    ],
    "academic": {
        "gpa": "3.95",
        "school_type": "Public High School",
        "test_scores": {"SAT": "1450", "ACT": "32"},
    },
    "activities": [
        {
            "position": "President",
            "category": "Academic",
            "description": "Led environmental science club, organized community clean-up events, coordinated with local government on sustainability initiatives",
        },
        {
            "position": "Team Captain",
            "category": "Athletics",
            "description": "Varsity soccer team captain, led team to state championship, organized team community service projects",
        },
        {
            "position": "Volunteer",
            "category": "Community Service",
            "description": "Tutored underprivileged students in math and science, developed curriculum for after-school program",
        },
        {
            "position": "Research Assistant",
            "category": "Research",
            "description": "Assisted professor in environmental impact study, analyzed data on renewable energy adoption in rural communities",
        },
    ],
    "future_plans": [
        "Study environmental engineering",
        "Work on renewable energy solutions",
        "Masters degree",
        "Start a non-profit focused on rural sustainability",
    ],
}

In [16]:
# user_embedding = embeddings_model.embed_query(json.dumps(DUMMY_USER_DATA))

In [17]:
# similarities = []

# for app in applications:
#     similarity = cosine_similarity([user_embedding], [app.embedding])[0][0]
#     similarities.append((app, similarity))

# top_3_similar = sorted(similarities, key=lambda x: x[1], reverse=True)[:3]

# print("Top 3 Accepted Applications:")
# print("\n".join([each[0].filename for each in top_3_similar]))

In [18]:
# application_narratives = []

# for each in top_3_similar:
#     temp = each[0].content

#     if "application_narrative" in temp:
#         application_narratives.append(temp["application_narrative"])

In [None]:
from langchain.agents import create_openai_tools_agent, AgentExecutor
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an elite admissions strategist. "
            "ONLY call the router tool `route_tool_call(user_request, user_profile)` once per user request. "
            "Never call other tools directly. "
            "Pass the user's request verbatim as `user_request`, and always include `user_profile`. "
            "The router will selectively call the appropriate tool and handle dependencies (e.g., auto-generate narrative before future plan). "
            "Return the router's output as the final answer."
        ),
        ("user", "USER_PROFILE(JSON): {user_profile}\n\nTASK: {input}"),
        MessagesPlaceholder("agent_scratchpad"),
    ]
)
# make a tools list 
# Only expose the router tool to the agent to prevent direct multi-tool calls
tools = [
    route_tool_call,route_tool_call_complete,json_to_markdown_llm,web_search,rewrite_search_query
]

#make the llm tool aware 
llm_with_tools = llm.bind_tools(tools)

In [20]:
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    input_key="input",
)

agent = create_openai_tools_agent(llm, tools, prompt)

executor = AgentExecutor(agent=agent, tools=tools, verbose=True)


  memory = ConversationBufferMemory(


In [21]:
resp = executor.invoke(
    {
        "input": "Generate main essay ideas for this student",
        "user_profile": DUMMY_USER_DATA,
    }
)
print(resp["output"])



[1m> Entering new AgentExecutor chain...[0m


INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"


[32;1m[1;3m
Invoking: `route_tool_call` with `{'user_request': 'Generate main essay ideas for this student', 'user_profile': "{'name': 'Sarah Johnson', 'gender': 'Female', 'birth_country': 'United States', 'cultural_background': ['American', 'first-generation college student', 'rural upbringing', 'STEM-focused', 'environmental advocate'], 'academic': {'gpa': '3.95', 'school_type': 'Public High School', 'test_scores': {'SAT': '1450', 'ACT': '32'}}, 'activities': [{'position': 'President', 'category': 'Academic', 'description': 'Led environmental science club, organized community clean-up events, coordinated with local government on sustainability initiatives'}, {'position': 'Team Captain', 'category': 'Athletics', 'description': 'Varsity soccer team captain, led team to state championship, organized team community service projects'}, {'position': 'Volunteer', 'category': 'Community Service', 'description': 'Tutored underprivileged students in math and science, developed curriculum for

INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"


[36;1m[1;3m{'type': 'essay', 'result': {'main_essay_ideas': [{'title': "Turning Trash into Power: A First-Gen Leader's Mission", 'theme': 'Innovation and leadership in sustainability', 'hook': 'The smell of landfill waste was sharp as I stood with my environmental science club in a field too full of discarded energy potential.', 'challenge': "Convincing my rural community that a 'waste-to-energy' policy was more than just a high school project.", 'journey': 'My club and I researched, developed, and tested ways to convert waste into reusable energy. After weeks of rejection, I presented a polished proposal before the city council, showing how it could reduce landfill overflow and produce renewable power.', 'growth': 'I discovered the courage to speak up—especially in spaces where I felt underestimated as a young, first-generation woman—and learned that practical, data-driven arguments could drive systemic change.', 'impact': 'Our policy reduced landfill use by 20% and benefitted 5,000

In [22]:
resp_1 = executor.invoke(
    {
        "input": "Generate complete strategy for this student",
        "user_profile": DUMMY_USER_DATA,
    }
)
print(resp_1["output"])



[1m> Entering new AgentExecutor chain...[0m


INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"


[32;1m[1;3m
Invoking: `route_tool_call_complete` with `{'user_request': 'Generate complete strategy for this student', 'user_profile': "{'name': 'Sarah Johnson', 'gender': 'Female', 'birth_country': 'United States', 'cultural_background': ['American', 'first-generation college student', 'rural upbringing', 'STEM-focused', 'environmental advocate'], 'academic': {'gpa': '3.95', 'school_type': 'Public High School', 'test_scores': {'SAT': '1450', 'ACT': '32'}}, 'activities': [{'position': 'President', 'category': 'Academic', 'description': 'Led environmental science club, organized community clean-up events, coordinated with local government on sustainability initiatives'}, {'position': 'Team Captain', 'category': 'Athletics', 'description': 'Varsity soccer team captain, led team to state championship, organized team community service projects'}, {'position': 'Volunteer', 'category': 'Community Service', 'description': 'Tutored underprivileged students in math and science, developed curr

INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.az

[33;1m[1;3m{
  "complete_strategy": [
    {
      "narrative": {
        "title": "From Rural Soccer Fields to Renewable Energy Innovations",
        "positioning": "What connects a varsity soccer captain and a budding environmental engineer? Leadership rooted in community impact.",
        "essay_concept": "Opening scene of Sarah strategizing plays on the soccer field → realization that teamwork mirrors sustainable problem-solving → understanding the importance of rural communities investing in renewable energy → organizing local initiatives → plans to scale rural sustainability nationwide.",
        "anchor_scene": "Under the glare of stadium lights in a critical championship match, Sarah watches her teammates huddle up. She calls for a defensive strategy that emphasizes trust and precision, a leadership style she would later use to rally her environmental science peers for rural clean-up campaigns.",
        "unexpected_twist": "Combining athletic leadership and STEM precision to 

In [23]:
from tools import json_to_markdown_llm

md = json_to_markdown_llm.invoke({"data": resp_1["output"]})

print(md)

INFO:httpx:HTTP Request: POST https://clabai3366454354.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"


# Complete Strategy Overview

## Narrative: "From Rural Soccer Fields to Renewable Energy Innovations"
Sarah's journey blends leadership on and off the field with her passion for sustainability, culminating in initiatives that merge community impact with renewable energy solutions.

### Key Elements
- **Title:** From Rural Soccer Fields to Renewable Energy Innovations
- **Positioning:** "What connects a varsity soccer captain and a budding environmental engineer? Leadership rooted in community impact."
- **Essay Concept:**  
  - Sarah strategizes plays during a soccer game, applying teamwork principles to sustainable problem-solving.  
  - Realizes the importance of rural communities investing in renewable energy.  
  - Organizes local initiatives with plans to scale them nationally.  
- **Anchor Scene:**  
  Under stadium lights during a championship match, Sarah uses trust and precision to lead her team—a strategy she later echoes in environmental clean-up campaigns.  
- **Unexpected