# Imports


In [6]:
import os
import json
from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI
from datetime import datetime, timedelta


In [2]:
# Load .env environment variables
load_dotenv()
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

# Initialize LLM


In [3]:
def initialize_llm():
    provider = os.getenv("MODEL_PROVIDER")
    if provider == "GROQ":
        print(1)
        # Initialize the Groq LLM
        llm = ChatGroq(
            model= os.getenv("GROQ_MODEL"),
            api_key=os.getenv("GROQ_API_KEY"),
            temperature=0.1,
            max_tokens=13000,
            top_p=0.95,
            frequency_penalty=0,
            presence_penalty=0,
            stop=None,
        )
    elif provider == "OPENAI":
        # Initialize the OpenAI LLM
        llm = ChatOpenAI(
            model= os.getenv("OPENAI_MODEL"),
            openai_api_key=os.getenv("OPENAI_API_KEY"),
            temperature=0.1,
            max_tokens=13000,
            top_p=0.95,
            frequency_penalty=0,
            presence_penalty=0,
            stop=None,
        )
    return llm

In [4]:
llm = initialize_llm()

In [5]:
print(llm)

client=<openai.resources.chat.completions.completions.Completions object at 0x000002B86EB6A300> async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000002B86EE4FA40> root_client=<openai.OpenAI object at 0x000002B86B887F80> root_async_client=<openai.AsyncOpenAI object at 0x000002B86EE4D100> model_name='gpt-4.1-mini' temperature=0.1 model_kwargs={} openai_api_key=SecretStr('**********') presence_penalty=0.0 frequency_penalty=0.0 top_p=0.95 max_tokens=13000


# Logic


In [7]:
def format_range(start, end):
    return f"{start.strftime('%Y-%m-%d')} - {end.strftime('%Y-%m-%d')}"

In [8]:
def get_three_month_study_phases(start_date_str: str):
    try:
        start_date = datetime.strptime(start_date_str, "%Y-%m-%d").date()
    except ValueError:
        return "Invalid date format. Use YYYY-MM-DD."
 
    foundation_start = start_date
    foundation_end = foundation_start + timedelta(days=30)
 
    elevation_start = foundation_end + timedelta(days=1)
    elevation_end = elevation_start + timedelta(days=29)
 
    peak_start = elevation_end + timedelta(days=1)
    peak_end = peak_start + timedelta(days=29)
 
    return (
        format_range(foundation_start, foundation_end),
        format_range(elevation_start, elevation_end),
        format_range(peak_start, peak_end),
        peak_end.strftime('%Y-%m-%d')
    )

In [9]:
def get_custom_study_phases(today_str: str, planned_str: str):
    try:
        today = datetime.strptime(today_str, "%Y-%m-%d").date()
        planned = datetime.strptime(planned_str, "%Y-%m-%d").date()
    except ValueError:
        return "Invalid date format. Use YYYY-MM-DD."
 
    if planned <= today:
        return "Planned date must be after today's date."
 
    diff_days = (planned - today).days
 
    part = diff_days // 3
    remainder = diff_days % 3
 
    foundation_start = today
    foundation_end = foundation_start + timedelta(days=part + (1 if remainder > 0 else 0) - 1)
 
    elevation_start = foundation_end + timedelta(days=1)
    elevation_end = elevation_start + timedelta(days=part + (1 if remainder > 1 else 0) - 1)
 
    peak_start = elevation_end + timedelta(days=1)
    peak_end = planned
 
    return (
        format_range(foundation_start, foundation_end),
        format_range(elevation_start, elevation_end),
        format_range(peak_start, peak_end),
        peak_end.strftime('%Y-%m-%d')
    )

In [10]:
def get_study_plan(current_score: int, goal_score: int, today_date_str: str, planned_date_str: str) -> str:
    # Convert string to date
    try:
        today_date = datetime.strptime(today_date_str, "%Y-%m-%d").date()
        planned_date = datetime.strptime(planned_date_str, "%Y-%m-%d").date()
    except ValueError:
        return "Invalid date format. Use YYYY-MM-DD."
 
    score_diff = goal_score - current_score
    if score_diff < 0:
        return "Invalid input: Goal score must be higher than current score."
 
    days_gap = (planned_date - today_date).days
    if days_gap < 0:
        return "Invalid input: Planned date must be in the future."
   
    tone_confident_encouraging = "Use a confident and encouraging tone. Focus on fine-tuning and light improvements."
    tone_motivational_urgent = "Use a motivational tone with urgency. Emphasize structured habits and sustained effort."
    tone_serious_constructive = "Use serious, honest, and constructive tone. Highlight foundational review and disciplined rebuilding."
 
    duration_confident = "60"
    flex_duration_confident = "30"
 
    duration_ambitious = "90"
    flex_duration_ambitious = "60"
 
    duration_aggressive = "120"
    flex_duration_aggressive = "90"
 
    fifth_sentence = ""
 
    # Additionally, include these exact points at the end of the summary:
    conclusion = "Conclude the summary by adding these exact sentences: Considering your upcoming SAT exam date and target score, please write an email to info@sherpalai.com so we can assist you in tailoring your study plan. The default study plan is for three months of total preparation time."
 
    # Case 1: Any score difference and gap < 9 days
    if days_gap < 8:
 
        result = get_three_month_study_phases(today_date_str)
        if isinstance(result, str):
            return result
        else:
            foundation, elevation, peak, end_date = result
 
        duration = duration_confident
        flex_duration = flex_duration_confident
        tone = tone_confident_encouraging
        fifth_sentence = conclusion
       
        return foundation, elevation, peak, end_date, duration, flex_duration, tone, fifth_sentence
 
    # Case 2: 9–31 days
    if 8 <= days_gap <= 31:
        if score_diff <= 100:
 
            result = get_custom_study_phases(today_date_str, planned_date_str)
            if isinstance(result, str):
                return result
            else:
                foundation, elevation, peak, end_date = result
 
            duration = duration_confident
            flex_duration = flex_duration_confident
            tone = tone_confident_encouraging
 
            return foundation, elevation, peak, end_date, duration, flex_duration, tone, fifth_sentence
       
        elif 101 <= score_diff <= 250:
 
            result = get_custom_study_phases(today_date_str, planned_date_str)
            if isinstance(result, str):
                return result
            else:
                foundation, elevation, peak, end_date = result
 
            duration = duration_ambitious
            flex_duration= flex_duration_ambitious
            tone = tone_motivational_urgent
 
            return foundation, elevation, peak, end_date, duration, flex_duration, tone, fifth_sentence
       
        elif 251 <= score_diff <= 400:
 
            result = get_custom_study_phases(today_date_str, planned_date_str)
            if isinstance(result, str):
                return result
            else:
                foundation, elevation, peak, end_date = result
           
            duration= duration_aggressive
            flex_duration= flex_duration_aggressive
            tone = tone_serious_constructive
 
            return foundation, elevation, peak, end_date, duration, flex_duration, tone, fifth_sentence
       
        elif score_diff > 400:
 
            result = get_three_month_study_phases(today_date_str)
            if isinstance(result, str):
                return result
            else:
                foundation, elevation, peak, end_date = result
           
            duration = duration_confident
            flex_duration = flex_duration_confident
            tone = tone_confident_encouraging
            fifth_sentence = conclusion
           
            return foundation, elevation, peak, end_date, duration, flex_duration, tone, fifth_sentence
 
    # Case 3: 31–60 days
    if 31 < days_gap <= 60:
        if score_diff <= 500:
 
            result = get_custom_study_phases(today_date_str, planned_date_str)
            if isinstance(result, str):
                return result
            else:
                foundation, elevation, peak, end_date = result
 
            duration = duration_confident
            flex_duration = flex_duration_confident
            tone = tone_confident_encouraging
 
            return foundation, elevation, peak, end_date, duration, flex_duration, tone, fifth_sentence
        elif 501 <= score_diff <= 600:
 
            result = get_custom_study_phases(today_date_str, planned_date_str)
            if isinstance(result, str):
                return result
            else:
                foundation, elevation, peak, end_date = result
 
            duration = duration_ambitious
            flex_duration= flex_duration_ambitious
            tone = tone_motivational_urgent
 
            return foundation, elevation, peak, end_date, duration, flex_duration, tone, fifth_sentence
       
        elif 601 <= score_diff <= 700:
 
            result = get_custom_study_phases(today_date_str, planned_date_str)
            if isinstance(result, str):
                return result
            else:
                foundation, elevation, peak, end_date = result
 
            duration= duration_aggressive
            flex_duration= flex_duration_aggressive
            tone = tone_serious_constructive
 
            return foundation, elevation, peak, end_date, duration, flex_duration, tone, fifth_sentence
        elif score_diff > 700:
 
            result = get_three_month_study_phases(today_date_str)
            if isinstance(result, str):
                return result
            else:
                foundation, elevation, peak, end_date = result
 
            duration = duration_confident
            flex_duration = flex_duration_confident
            tone = tone_confident_encouraging
            fifth_sentence = conclusion
           
            return foundation, elevation, peak, end_date, duration, flex_duration, tone, fifth_sentence
 
    # Case 4: 61–90 days
    if 61 <= days_gap <= 90:
        if score_diff <= 800:
           
            result = get_custom_study_phases(today_date_str, planned_date_str)
            if isinstance(result, str):
                return result
            else:
                foundation, elevation, peak, end_date = result
 
            duration = duration_confident
            flex_duration = flex_duration_confident
            tone = tone_confident_encouraging
 
            return foundation, elevation, peak, end_date, duration, flex_duration, tone, fifth_sentence
       
        elif 801 <= score_diff <= 900:
 
            result = get_custom_study_phases(today_date_str, planned_date_str)
            if isinstance(result, str):
                return result
            else:
                foundation, elevation, peak, end_date = result
 
            duration = duration_ambitious
            flex_duration= flex_duration_ambitious
            tone = tone_motivational_urgent
 
            return foundation, elevation, peak, end_date, duration, flex_duration, tone, fifth_sentence
       
        elif 901 <= score_diff <= 1000:
 
            result = get_custom_study_phases(today_date_str, planned_date_str)
            if isinstance(result, str):
                return result
            else:
                foundation, elevation, peak, end_date = result
 
            duration= duration_aggressive
            flex_duration= flex_duration_aggressive
            tone = tone_serious_constructive
 
            return foundation, elevation, peak, end_date, duration, flex_duration, tone, fifth_sentence
        elif score_diff > 1000:
 
            result = get_three_month_study_phases(today_date_str)
            if isinstance(result, str):
                return result
            else:
                foundation, elevation, peak, end_date = result
 
            duration = duration_confident
            flex_duration = flex_duration_confident
            tone = tone_confident_encouraging
            fifth_sentence = conclusion
           
            return foundation, elevation, peak, end_date, duration, flex_duration, tone, fifth_sentence
 
    # Case 5: >90 days
    if days_gap > 90:
           
            result = get_custom_study_phases(today_date_str, planned_date_str)
            if isinstance(result, str):
                return result
            else:
                foundation, elevation, peak, end_date = result
 
            duration = duration_confident
            flex_duration = flex_duration_confident
            tone = tone_confident_encouraging
           
            return foundation, elevation, peak, end_date, duration, flex_duration, tone, fifth_sentence
 
    return "Unexpected case. Please review the inputs."

In [11]:
def get_domain_rankings(rw_domains, math_domains):
    # Weightage for each domain
    rw_weightage = {
        "Craft and Structure": 0.28,
        "Information and Ideas": 0.26,
        "Standard English Conventions": 0.26,
        "Expression of Ideas": 0.20
    }
 
    math_weightage = {
        "Algebra": 0.35,
        "Advanced Math": 0.35,
        "Problem-Solving and Data Analysis": 0.15,
        "Geometry and Trigonometry": 0.15
    }
 
    # Helper to compute domain info with priority score
    def process_domains(domains, weight_map):
        domain_info = []
        for d in domains:
            name = d["domain"]
            accuracy = d["accuracy"]
            weight = weight_map.get(name, 0)
            priority_score = (1 - accuracy) * weight
            domain_info.append({
                "name": name,
                "accuracy": accuracy,
                "weightage": weight,
                "priority_score": priority_score
            })
        return domain_info
 
    # Process each category
    rw_processed = process_domains(rw_domains, rw_weightage)
    math_processed = process_domains(math_domains, math_weightage)
 
    # Combine and sort all domains
    combined_domains = rw_processed + math_processed
 
    combined_ranking = sorted(
        combined_domains,
        key=lambda d: (-d['priority_score'], -d['weightage'], d['name'])
    )
 
    rw_ranking = sorted(
        rw_processed,
        key=lambda d: (-d['priority_score'], -d['weightage'], d['name'])
    )
 
    math_ranking = sorted(
        math_processed,
        key=lambda d: (-d['priority_score'], -d['weightage'], d['name'])
    )
 
    return combined_ranking, rw_ranking, math_ranking
 

# Prompt


In [19]:
EXECUTIVE_SUMMARY_PROMPT = """
Instructions:
You are generating a comprehensive SAT readiness report for a Sherpal student. You will be provided with two documents per student:
Their Sherpal Persona Profile. Call it Sherpal Persona
Their SAT Readiness Dashboard Report
Based on these documents, produce a formatted student + parent–friendly report that includes the following:
1. Executive Summary (150 words max)
Introduce the student's persona (name, type, and strengths/traits).
Mention their current SAT score, accuracy, and time management.
Summarize 3 key strengths and 3 weaknesses in both Math and Reading & Writing. Make sure to add the right Khan academy links for areas of weakness in a tabulated format for the bottom 3 skills for both reading and writing, and math in a clear bulleted points.
Set realistic growth targets for the June and/or August SAT.
2. Detailed Study Plan (500 words max)
Tailor the strategy to their persona traits (e.g., Steady Climber = structured, needs flexibility drills).
Break it into two phases:
May Study Plan (Foundation for June SAT)
June-July Study Plan (Elevation for August SAT)
Use paragraph + bullet format.
Include:
Weekly goals
Skill areas to target
Specific practice methods
Time-based structure (e.g., 4 sessions/week per subject)
One "Flex Day" per week to build adaptability
Include a weekly schedule table with Day, Focus Area, and Duration.
3. Tips to Make It Work Best
Offer 4–5 practical tips aligned to the student's persona (e.g., reflection, switching study methods, logging confidence, simulating real test day).
4. Words of Encouragement
End with 100–150 words of personalized motivation that speaks to the student’s persona, acknowledging their effort and growth potential. Use empowering, positive language.
5. Output Format
Generate the report in a professionally formatted Word , and no icons, document with:
Clean headings
Structured sections
A formatted study schedule table
Clear, readable fonts
 Add a quote from their persona profile
add inspirational story snippet .
add this to the footer in tiny grey font:
Safe harbor
The scores, assessments, and recommendations provided in this report are for informational purposes only and do not constitute professional educational, psychological, or legal advice. Current performance is not necessarily indicative of actual or future results, as test-day conditions and individual circumstances may vary. While care has been taken in the preparation of this material, we make no representations or warranties of any kind, express or implied, about the completeness, accuracy, or reliability of the information presented. Any reliance you place on such information is strictly at your own risk. We accept no liability for any loss or damage arising from the use of this report.
© 2025 Sthirah Inc. | Confidential & Proprietary - For internal Use Only
"""

# UTILS


In [None]:
def list_to_numbered_string(items):
    """Convert list of strings to a numbered string with newlines."""
    return "\n".join([f"{i + 1}. {item}" for i, item in enumerate(items)])


# Template


In [15]:
# Create the prompt
def generate_template_from_folder(folder_path: str) -> str:
    """Reads input data from a given folder and returns the formatted SAT prompt."""
    input_file = os.path.join(folder_path, "Input_data.json")
 
    with open(input_file, "r", encoding="utf-8") as f:
        data = json.load(f)
 
    student = data["user"]
    persona = data["persona"]
    report = data["sat_report"]
 
    todays_date = datetime.today().strftime('%Y-%m-%d')
    todays_day = datetime.strptime(todays_date, '%Y-%m-%d').strftime('%A')

    planned_sat_day = datetime.strptime(student["planned_sat_date"], '%Y-%m-%d').strftime('%A')

 
 
    result = get_study_plan(report["scores"]["total"], student["goal_score"], todays_date, student["planned_sat_date"])
    combined_ranks, rw_ranks, math_ranks = get_domain_rankings(report["accuracy"][0]["domain_accuracy"], report["accuracy"][1]["domain_accuracy"])
 
    if isinstance(result, str):
        print("Error:", result)
    else:
        foundation, elevation, peak, end_date, duration, flex_duration, tone, fifth_sentence = result
 
 
    template_data = {
        "student_name": f"{student['first_name']} {student['last_name']}",
        "planned_sat_date": student["planned_sat_date"],
        "planned_sat_day": planned_sat_day,
        "today_date": todays_date,
        "todays_day": todays_day,
        "persona_name": persona["persona_name"],
        "persona_strengths": list_to_numbered_string(persona["key_strengths"]),
        "persona_challenges": list_to_numbered_string(persona["areas_of_improvement"]),
        "growth_strategies": persona["growth_strategies"],
        "rw_score": report["scores"]["reading_writing"],
        "math_score": report["scores"]["math"],
        "total_score": report["scores"]["total"],
        "reading_accuracy": f"{int(report['accuracy'][0]['accuracy'] * 100)}%",
        "math_accuracy": f"{int(report['accuracy'][1]['accuracy'] * 100)}%",
        "goal_score": student["goal_score"],
        "reading_domain_accuracy": report["accuracy"][0]["domain_accuracy"],
        "math_domain_accuracy": report["accuracy"][1]["domain_accuracy"],
        "foundation_date_range": foundation,
        "elevation_date_range": elevation,
        "peak_date_range": peak,
        "fifth_sentence": fifth_sentence,
        "duration": duration,
        "flex_duration": flex_duration,
        "tone": tone,
        "overall_rank1": combined_ranks[0]["name"],
        "overall_rank2": combined_ranks[1]["name"],
        "overall_rank3": combined_ranks[2]["name"],
        "overall_rank4": combined_ranks[3]["name"],
        "overall_rank5": combined_ranks[4]["name"],
        "rw_rank1": rw_ranks[0]["name"],
        "rw_rank2": rw_ranks[1]["name"],
        "rw_rank3": rw_ranks[2]["name"],
        "rw_rank4": rw_ranks[3]["name"],
        "math_rank1": math_ranks[0]["name"],
        "math_rank2": math_ranks[1]["name"],
        "math_rank3": math_ranks[2]["name"],
        "math_rank4": math_ranks[3]["name"],
        "end_date": end_date,
    }
 
    return EXECUTIVE_SUMMARY_PROMPT.format(**template_data)

In [16]:
def invoke_and_save_response(folder_path: str, prefix: str, llm) -> None:
    
    prompt = generate_template_from_folder(folder_path)

    response = llm.invoke(prompt)
    print(response.usage_metadata)
    content = response.content if hasattr(response, "content") else response

    print("************************************",prefix,"************************************")
    print(content)

    # Save the response to a file with prefix
    input_file_path = os.path.join(folder_path, "Input_data.json")
    with open(input_file_path, "r") as f:
        data = json.load(f)

    name = data["user"]["first_name"]
    output_filename = f"{name}_{prefix}.json"
    output_file_path = os.path.join(folder_path, output_filename)

    # Delete the file if it exists
    if os.path.exists(output_file_path):
        os.remove(output_file_path)

    # Directly write the content string to the file
    with open(output_file_path, "w", encoding="utf-8") as out_f:
        out_f.write(content)
    print(prompt)

# Users


In [17]:
folders = ["Aarthi"]#, "Govind", "Ishan", "Jevinn", "Ronit", "Sumedh", "Toni", "Zoha"]

In [18]:
for folder in folders:

    folder_path = r"C:\Users\Manideep S\Downloads\L@\SAT Readiness Report\Users_data\{folder}".format(folder=folder)

    for i in range(1, 5):  # Runs for i = 1 to 4
        prefix = str(i)
        invoke_and_save_response(folder_path, prefix, llm)

NameError: name 'list_to_numbered_string' is not defined