## load libraries

In [6]:
import os
import csv 
import sys
import json
import re
import pandas as pd
import numpy as np
import ollama


### Prompt format 

In [7]:
import pandas as pd
import csv
def generate_prompt(question):
    prompt_template = """
    You are a Chemistry expert tasked with answering questions.
    Answer the following question with a detailed explanation:
    Question: {question}
    provide the final answer in a boxed format like this:
    \n\n\\boxed{{Your final answer here}}\n\n
    """
    return prompt_template.format(question=question)

### ollama API call 

In [8]:
from together import Together
# load .env file
from dotenv import load_dotenv
load_dotenv()
# Set the API key
# TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
# client = Together(api_key=TOGETHER_API_KEY)
def ollama_api_call(prompt):

    response = ollama.chat(
    model="gemma3:1b",
    messages=[
        {"role": "system", "content": """You are a Chemistry expert tasked with answering questions. Answer the following question with a detailed explanation. Provide the final answer in a boxed format like this: \n\n\\boxed{Your final answer here}\n\n"""},
        {"role": "user", "content": prompt}
    ],
    )

# # Now parse the JSON safely
    output = response["message"]["content"]
    # remove everything up to and including the </think> tag
    if "</think>" in output:
        output = output.split("</think>")[-1].strip()

    print(output)
    explaination = output
    # Check if the response contains a boxed answer
    boxed_answer = re.search(r'\\boxed\{(.+)\}', output)
    answer = None
    if boxed_answer:
        # Extract the answer from the boxed format
        answer = boxed_answer.group(1).strip()

    return explaination, answer

### read from csv file 

In [10]:
import os
import pandas as pd

def preprocess_csv(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path,sep=',', encoding='utf-8')

    # print(df.columns)
    # print(df.head())




    
    # Initialize new columns (empty strings by default)
    df['explaination'] = ''
    df['answer'] = ''
    
    # Define the valid Task IDs: "SDP79" through "SDP92" (inclusive)

    # Loop over the DataFrame rows
    for idx, row in df.iterrows():
        # question is in 5th column
        if idx==0 or idx==1:
            # Skip the first two rows
            continue
        question = str(row.iloc[4]).strip()
        prompt = generate_prompt(question)
        
        # Get explanation and answer from the Together API
        explaination, answer = ollama_api_call(prompt)
        
        # Store results in the new columns
        df.at[idx, 'explaination'] = explaination
        df.at[idx, 'answer'] = answer
        
        # Print to console
        # print(f"TaskID: {task_id}")
        print(f"Question: {question}")
        print(f"Explanation: {explaination}")
        print(f"Answer: {answer}\n")
    
    # Construct a new filename by appending "_with_explanations.csv"
    base, _ = os.path.splitext(file_path)
    output_path = f"{base}_with_explanations.csv"
    # Write the updated DataFrame to the new CSV
    df.to_csv(output_path, index=False)
    print(f"✅  Output written to: {output_path}")

preprocess_csv("Synthetic Data Evaluation  - Chemistry (1).csv")  


Okay, let's break down the reaction and arrive at the final answer.

**Detailed Explanation**

The reaction you've described is a Base-Promoted Elimination (BPE) reaction, specifically a E2 elimination. Here’s a step-by-step explanation of what happens and the resulting product:

1.  **Starting Material:** We begin with (R)-3-bromo-4-methylhexane. This molecule possesses a secondary alkyl halide (bromo group) and a methyl group positioned strategically on a hexane chain. The chiral center (the carbon with the bromine) is crucial because it will determine the stereochemistry of the alkene product.

2.  **Base-Promoted Elimination (BPE):** The key to this reaction is the presence of a strong base, typically potassium tert-butoxide (KOtBu). The base deprotonates the α-carbon of the alkyl group, generating an enolate intermediate.

3.  **Mechanism:** The enolate intermediate then attacks the carbon atom bearing the bromine, causing the bromine to leave as a bromide ion and forming a double

### process from json file

In [None]:
import json

def process_questions(input_json_path: str, output_json_path: str):
    """
    Reads questions from a JSON file of the form:
      [
        {
          "data": [
            {
              "unique_id": "...",
              "topics": [...],
              "concepts": [...],
              "question": "...",
              "file_conversation_log": "..."
            },
            ...
          ]
        }
      ]
    Generates an explanation and answer for each entry (using your generate_prompt +
    ollama_api_call), then writes out the same structure to output_json_path,
    with each entry extended to include 'explanation' and 'answer'.
    """
    # 1. Load the input JSON
    # with open(input_json_path, 'r') as f:
    #     json_list = json.load(f)
        
    questions = [
r"""Thermodynamics of a Carbonate Decomposition  
$$\text{CoCO}_3(s) \rightarrow \text{CoO}(s) + \text{CO}_2(g)$$  
$\Delta H^\circ = +115.4\ \text{kJ mol}^{-1}$, $\Delta S^\circ = +147\ \text{J mol}^{-1} \text{K}^{-1}$ (298 K)  
At what temperature (in °C) does $\Delta G^\circ = 0$?""",

r"""Nernst Equation for a Mixed-Redox Cell  
At 25 °C, the cell is:  
Pt | V³⁺ (0.100 M), V²⁺ (0.0150 M) ‖ Ag⁺ (0.0500 M) | Ag  
Standard potentials:  
V²⁺ ⇌ V³⁺ + e E° = +0.255 V  
Ag⁺ + e⁻ ⇌ Ag E° = +0.799 V  
Calculate the cell potential $E_{\text{cell}}$ (in V).""",

r"""Calculate the pH at the equivalence point when 50.0 mL of 0.100 M NH₃ is titrated with 0.100 M HCl, given $K_b(\text{NH}_3)=1.8×10^{-5}$ and $K_w=1.0×10^{-14}$.""",

r"""Electronic solvation free energy  
A DFT calculation gives electronic energies for methyl α-D-glucopyranoside of  
$E_{\text{gas}} = -362.314$ hartree and $E_{\text{soln}} = -362.503$ hartree.  
(1 hartree = 2625.5 kJ mol⁻¹.)  
Calculate the electronic component of the solvation free energy  
$\Delta G_{\text{solv}} = E_{\text{soln}} - E_{\text{gas}}$ in kJ mol⁻¹. Report to three decimal places.""",

r"""Calculate the initial rate (mol$\cdot$L$^{-1}\cdot$s$^{-1}$) of cis-to-trans isomerization for a photoactivatable azobenzene derivative with quantum yield $\phi = 0.42$ under $650$ nm illumination with photon flux $1.5 \times 10^{16}$ photons$\cdot$s$^{-1}\cdot$cm$^{-2}$ in a $1$ mL solution contained in a $1$ cm$^{2}$ cuvette (pathlength $1$ cm) at initial concentration $5~\mu$M. Assume that 100% of the incident photons are absorbed by the solution (i.e., no transmission). Use Avogadro’s number $N_A = 6.022 \times 10^{23}$ mol$^{-1}$.""",

r"""A dendritic polymer bearing eight peripheral photo-cleavable o-nitrobenzyl ester groups is dissolved at $0.5$ mM in $1$ mL solution (pathlength $1$ cm), which is continuously illuminated at $365$ nm with intensity $10$ mW$\cdot$cm$^{-2}$ for $5$ minutes.  
Given molar extinction coefficient $\varepsilon = 4.5 \times 10^{3}$ L$\cdot$mol$^{-1}\cdot$cm$^{-1}$ and quantum yield of cleavage $\phi = 0.25$, calculate the concentration (mol$\cdot$L$^{-1}$) of cleaved ester groups produced, assuming all photons absorbed contribute to cleavage and uniform illumination across the cuvette cross-section. The polymer contains eight cleavable groups per molecule.""",

r"""A supramolecular intracellular assembly shows sigmoidal kinetics characterized by two activation free energy barriers at $310$ K:  
nucleation barrier $\Delta G^\ddagger_{\mathrm{nucleation}} = 75$ kJ$\cdot$mol$^{-1}$ and  
elongation barrier $\Delta G^\ddagger_{\mathrm{elongation}} = 40$ kJ$\cdot$mol$^{-1}$.  
Given the Arrhenius pre-exponential factor for nucleation $A_{\mathrm{nucleation}}$ is 100 times larger than that for elongation $A_{\mathrm{elongation}}$,  
calculate the ratio of their rate constants $k_{\mathrm{nucleation}}/k_{\mathrm{elongation}}$ at $310$ K.  
Use the gas constant $R = 8.314$ J$\cdot$mol$^{-1}\cdot$K$^{-1}$.""",

r"""A synthetic intracellular assembly formed from peptide amphiphiles has a critical micelle concentration (CMC) of $15~\mu$M at $37^\circ$C.  
After covalent attachment of a photo-cleavable group, the CMC increases to $45~\mu$M.  
Define the change in standard Gibbs free energy of micellization as  
$\Delta \Delta G^\circ = \Delta G^\circ_{\mathrm{modified}} - \Delta G^\circ_{\mathrm{original}}$.  
Calculate $\Delta \Delta G^\circ$ (kJ$\cdot$mol$^{-1}$) at $310$ K, assuming ideal solution behavior and using $R = 8.314$ J$\cdot$mol$^{-1}\cdot$K$^{-1}$.""",

r"""During photochemical control of intracellular assembly, the photoinduced rate constant $k_{\mathrm{photo}}$ follows first-order kinetics and depends on photon flux $I$ (photons$\cdot$cm$^{-2}\cdot$s$^{-1}$) as $k_{\mathrm{photo}} = \sigma I$, where $\sigma$ is the absorption cross-section.  
Given $\sigma = 2.5 \times 10^{-17}$ cm$^{2}$ and a target $k_{\mathrm{photo}} = 0.01$ s$^{-1}$ under $700$ nm illumination, calculate the required light intensity $I_{\mathrm{W}}$ in W$\cdot$cm$^{-2}$.  
Use Planck’s constant $h = 6.626 \times 10^{-34}$ J$\cdot$s and speed of light $c = 3.00 \times 10^{8}$ m$\cdot$s$^{-1}$.  
Show all steps starting from determining the photon flux $I$.""",

r"""Upon NIR irradiation at 800 nm, a photoactivatable supramolecular monomer is illuminated in a $1$ cm pathlength cuvette with monomer concentration $10~\mu$M and molar absorptivity $\varepsilon = 5{,}000$ L$\cdot$mol$^{-1}\cdot$cm$^{-1}$.  
Incident light intensity is $2 \times 10^{15}$ photons$\cdot$s$^{-1}\cdot$cm$^{-2}$ over a $0.5$ cm$^{2}$ cross-sectional area.  
Calculate the fraction of incident photons absorbed by the solution, assuming Beer–Lambert law applies and uniform illumination.""",

r"""Activation free energy $\Delta G^\ddagger$  
Data:  
• NMR coalescence at $T_c = 268$ K; $\Delta\nu = 120$ Hz  
• Rate at coalescence $k_c = \pi \cdot \Delta\nu/\sqrt{2}$  
• Eyring: $k_c = (k_B T_c/h)\cdot e^{–\Delta G^\ddagger/(R T_c)}$  
• $R = 8.314$ J$\cdot$K⁻¹$\cdot$mol⁻¹  
You may calculate $(k_B T_c/h)$ in one line.  
Report $\Delta G^\ddagger$ in kJ$\cdot$mol⁻¹ to three significant figures.""",

r"""**By given Spectroscopic Data Analysis, predict the name of the structure:**

**Molecular Formula:** $C_{20}H_{16}FN_{5}O_{2}S_{2}$

**IR (KBr, cm$^{-1}$):**

* 3352: N–H stretching (secondary amine)  
* 3220: N–H stretching (secondary amide)  
* 2891: C–H stretching ($\mathsf{-CH_3}$ group)  
* 2982: C–H stretching (aromatic ring)  
* 2772: C–H stretching ($\mathsf{-CH_2}$ group)  
* 1704: C=O stretching (secondary amide)  
* 1552: C=C stretching (aromatic ring)  
* 1290: C–H bending  
* 1194, 1088: C–O–C stretching  
* 1118: C–F stretching  
* 775: 1,2-disubstituted benzene ring  

**$^{1}$H NMR (300 MHz, DMSO-d$_{6}$, $\delta$ ppm):**

* 9.17 (s, 1H, Het–NH–CO–Ar)  
* 8.03–6.58 (m, 9H, Ar–H)  
* 4.44 (s, 2H, Het–CH$_{2}$)  
* 4.02 (s, 1H, Ar–NH–CH$_{2}$)  
* 2.47 (s, 3H, –N–C(CH$_{3}$)–C–)  

**$^{13}$C NMR (100 MHz, DMSO-d$_{6}$, $\delta$ ppm):**

$$
177.8,\ 166.3,\ 163.2,\ 156.2,\ 155.1,\ 154.2,\ 133.9,\ 132.5,\ 130.9\ (\times2),\ 129.2,\ 128.0,\ 125.7,\ 120.4,\ 116.3,\ 115.1,\ 104.1,\ 70.2,\ 17.4
$$

**LC-MS:**  
$$
m/z = 441.07\ (\mathrm{[M]^+})
$$"""
]

    all_questions = []
    for question in questions:
    #     # 1. Load the input JSON

    # 2. Iterate through all entries and augment
    # for container in json_list:
    #     for entry in container.get('data', []):
    #         q_text = entry.get('question')
    #         if not q_text:
    #             # skip anything without a question
    #             continue

    #         prompt = generate_prompt(q_text)
    #         print(f"Processing question: {q_text}")
    #         explanation, answer = ollama_api_call(prompt)
    #         print(f"Generated explanation: {explanation}")
    #         print(f"Extracted answer: {answer}")
    #         entry['explanation'] = explanation
    #         entry['answer'] = answer
      prompt = generate_prompt(question)
      print(f"Processing question: {question}")
      explanation, answer = ollama_api_call(prompt)
      print(f"Generated explanation: {explanation}")
      print(f"Extracted answer: {answer}")
      all_questions.append({
          
      'question': question,
          'explanation': explanation,
          'answer': answer
      })



    return all_questions
    # # 3. Dump back out
    # with open(output_json_path, 'w') as f:
    #     json.dump(json_list, f, indent=2, ensure_ascii=False)






# Example usage:
processed = process_questions('final_questions_physics.json','output_with_answers.json')


Processing question: Thermodynamics of a Carbonate Decomposition  
$$\text{CoCO}_3(s) \rightarrow \text{CoO}(s) + \text{CO}_2(g)$$  
$\Delta H^\circ = +115.4\ \text{kJ mol}^{-1}$, $\Delta S^\circ = +147\ \text{J mol}^{-1} \text{K}^{-1}$ (298 K)  
At what temperature (in °C) does $\Delta G^\circ = 0$?
To determine the temperature at which the Gibbs free energy change (ΔG°) equals zero for the decomposition of carbonate, we use the equation:

$$
\Delta G^\circ = \Delta H^\circ - T \Delta S^\circ
$$

Given:
- $\Delta H^\circ = +115.4\ \text{kJ mol}^{-1}$
- $\Delta S^\circ = +147\ \text{J mol}^{-1} \text{K}^{-1}$
- $298\ \text{K}$ (standard temperature)

First, convert $\Delta S^\circ$ to kJ/mol·K to match the units with $\Delta H^\circ$:

$$
\Delta S^\circ = 147\ \text{J mol}^{-1} \text{K}^{-1} = 0.147\ \text{kJ mol}^{-1} \text{K}^{-1}
$$

Setting $\Delta G^\circ = 0$:

$$
0 = 115.4\ \text{kJ mol}^{-1} - T \cdot 0.147\ \text{kJ mol}^{-1} \text{K}^{-1}
$$

Solving for $T$:

$$
T = \frac{11

### check if the answer follows the instruction

In [None]:
def checker_prompt(question, answer,guidlines):
    prompt_template = """
    You are a Mathematics expert tasked with checking if the answer to the questions is following the guidelines.
    Question: {question}
    Answer: {answer}
    Guidelines: {guidlines}
    provide detailed feedback on how well the answer follows the guidelines and what could be improved.
    """
    return prompt_template.format(question=question, answer=answer)
def accept_feedback_prompt(question, answer, feedback, guidlines):
    prompt_template = """
    You are a Mathematics expert tasked with improving the answer based on the feedback provided on guidelines.
    Please provide a revised answer that addresses the feedback. give the final answer betweeen <answer> and </answer> tags.
    Question: {question}
    Answer: {answer}
    Guidelines: {guidlines}
    Feedback: {feedback}

    """
    return prompt_template.format(question=question, answer=answer, feedback=feedback)
def accept_feedback(question, answer, feedback):
    prompt = checker_prompt(question, answer, feedback)
    response = ollama.chat(
        model="gemma3:1b",
        messages=[
            {"role": "system", "content": """You are a Mathematics expert tasked with checking if the answer to the questions is following the guidelines. provide detailed feedback on how well the answer follows the guidelines and what could be improved"""},
            {"role": "user", "content": prompt}
        ],
    )
    
    output = response["message"]["content"]
    # remove everything up to and including the </think> tag
    if "</think>" in output:
        output = output.split("</think>")[-1].strip()

    # print(output)
    accept_feedback_prompt = accept_feedback_prompt(question, answer, feedback)
    response = ollama.chat(
        model="gemma3:1b",
        messages=[
            {"role": "system", "content": """You are a Mathematics expert tasked with improving the answer based on the feedback provided on guidelines. Please provide a revised answer that addresses the feedback. give the final answer betweeen <answer> and </answer> tags."""},
            {"role": "user", "content": accept_feedback_prompt}
        ],
    )
    output = response["message"]["content"]
    # remove everything up to and including the </think> tag
    if "</think>" in output:
        output = output.split("</think>")[-1].strip()
    # Check if the response contains a boxed answer
    comeplte_answer= re.search(r'<answer>(.+)</answer>', output)

    if comeplte_answer:
        # Extract the answer from the boxed format
        comeplte_answer = answer.group(1).strip()
    else:
        comeplte_answer = None

    #check boxed answer
    boxed_answer = re.search(r'\\boxed\{(.+)\}', output)
    if boxed_answer:
        # Extract the answer from the boxed format
        boxed_answer = boxed_answer.group(1).strip()
    else:
        boxed_answer = None
    return comeplte_answer, boxed_answer

# Example usage
# open a csv file to read the questions and answers
def read_csv(file_path):
    df = pd.read_csv(file_path, sep=',', encoding='utf-8')
    return df

guidlines = "The answer should be in a boxed format like this: \\boxed{Your final answer here}"
df = read_csv("Synthetic Data Evaluation  - Chemistry (1)_with_explanations.csv")
for idx, row in df.iterrows():
    if idx==0 or idx==1:
        # Skip the first two rows
        continue
    question = str(row.iloc[4]).strip()
    answer = str(row.iloc[5]).strip()
    explaination = str(row.iloc[6]).strip()
    print(f"Question: {question}")
    print(f"Answer: {answer}")
    print(f"Explaination: {explaination}")
    comeplte_answer, boxed_answer = accept_feedback(question, answer, guidlines)
    print(f"Complete Answer: {comeplte_answer}")
    print(f"Boxed Answer: {boxed_answer}")

    # Store results in the new columns
    df.at[idx, 'new_answer'] = comeplte_answer
    df.at[idx, 'new_boxed_answer'] = boxed_answer

# Construct a new filename by appending "_with_explanations.csv"
base, _ = os.path.splitext("Synthetic Data Evaluation  - Chemistry (1)_with_explanations.csv")
output_path = f"{base}_with_answer_feedback.csv"
# Write the updated DataFrame to the new CSV
df.to_csv(output_path, index=False)
print(f"✅  Output written to: {output_path}")




