In [1]:
import os
import csv 
import sys
import json
import re
import pandas as pd
import numpy as np

In [5]:
import pandas as pd
import csv
def generate_prompt(question):
    prompt_template = """
    You are a physics expert tasked with answering questions.
    Answer the following question with a detailed explanation:
    Question: {question}
    provide the final answer in a boxed format like this:
    \n\n\\boxed{{Your final answer here}}\n\n
    """
    return prompt_template.format(question=question)

In [6]:
from together import Together
# load .env file
from dotenv import load_dotenv
load_dotenv()
# Set the API key
TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
client = Together(api_key=TOGETHER_API_KEY)
def together_api_call(prompt):

    import ollama
    response = ollama.chat(
    model="qwen3:0.6b",
    messages=[
        {"role": "system", "content": """You are a physics expert tasked with answering questions. Answer the following question with a detailed explanation. Provide the final answer in a boxed format like this: \n\n\\boxed{Your final answer here}\n\n"""},
        {"role": "user", "content": prompt}
    ],
    )

# # Now parse the JSON safely
    output = response["message"]["content"]
    # remove everything up to and including the </think> tag
    if "</think>" in output:
        output = output.split("</think>")[-1].strip()

    print(output)
    explaination = output
    # Check if the response contains a boxed answer
    boxed_answer = re.search(r'\\boxed{(.*?)}', explaination)
    answer = None
    if boxed_answer:
        # Extract the answer from the boxed format
        answer = boxed_answer.group(1).strip()

    return explaination, answer

In [8]:
import os
import pandas as pd

def preprocess_csv(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Ensure required columns exist
    if 'Taski ID' not in df.columns:
        raise ValueError("CSV file must contain a 'Taski ID' column.")
    if 'Question' not in df.columns:
        raise ValueError("CSV file must contain a 'Question' column.")
    
    # Initialize new columns (empty strings by default)
    df['explaination'] = ''
    df['answer'] = ''
    
    # Define the valid Task IDs: "SDP79" through "SDP92" (inclusive)
    valid_ids = {f"SDP{i}" for i in range(79, 93)}
    
    # Loop over the DataFrame rows
    for idx, row in df.iterrows():
        task_id = str(row['Taski ID']).strip()
        
        # Only process rows whose Taski ID is in SDP79 … SDP92
        if task_id in valid_ids:
            if task_id == "SDP86":
                # Skip SDP86 as per the requirement
                continue
            question = str(row['Question']).strip()
            prompt = generate_prompt(question)
            
            # Get explanation and answer from the Together API
            explaination, answer = together_api_call(prompt)
            
            # Store results in the new columns
            df.at[idx, 'explaination'] = explaination
            df.at[idx, 'answer'] = answer
            
            # Print to console
            print(f"TaskID: {task_id}")
            print(f"Question: {question}")
            print(f"Explanation: {explaination}")
            print(f"Answer: {answer}\n")
    
    # Construct a new filename by appending "_with_explanations.csv"
    base, _ = os.path.splitext(file_path)
    output_path = f"{base}_with_explanations.csv"
    # Write the updated DataFrame to the new CSV
    df.to_csv(output_path, index=False)
    print(f"✅  Output written to: {output_path}")

preprocess_csv("Atlas 2.0 Datasheet - Synthetic Data.csv")  


To derive the first-order expression for the shift in the W boson mass $ \delta M_W $, we begin with the Standard Model tree-level relation:  
$$ M_W = \frac{1}{2} g v. $$  

Given that the shift in the Higgs vacuum expectation value $ v $ is $ \delta v \ll v $, the corresponding shift in the W boson mass $ \delta M_W $ is given by the first-order approximation:  
$$ \delta M_W = \frac{1}{2} g \delta v. $$  

This expression accounts for the small change in $ v $, which directly affects the mass of the W boson. To constrain $ \delta v $, we observe that the measured value of $ \delta M_W $ must be consistent with the proportionality relationship. If $ \delta M_W $ is known and $ g $ is a known constant, then solving for $ \delta v $ yields:  
$$ \delta v = \frac{2}{g} \delta M_W. $$  

Since $ \delta v \ll v $, the actual value of $ \delta v $ is on the order of $ 10^{-3} $, making the shift in $ M_W $ a small quantity. This allows physicists to determine $ \delta v $ from the observed

In [None]:
import json

def process_questions(input_json_path: str, output_json_path: str):
    """
    Reads questions from a JSON file of the form:
      [
        {
          "data": [
            {
              "unique_id": "...",
              "topics": [...],
              "concepts": [...],
              "question": "...",
              "file_conversation_log": "..."
            },
            ...
          ]
        }
      ]
    Generates an explanation and answer for each entry (using your generate_prompt +
    together_api_call), then writes out the same structure to output_json_path,
    with each entry extended to include 'explanation' and 'answer'.
    """
    # 1. Load the input JSON
    # with open(input_json_path, 'r') as f:
    #     json_list = json.load(f)
        
    questions = [
r"""Thermodynamics of a Carbonate Decomposition  
$$\text{CoCO}_3(s) \rightarrow \text{CoO}(s) + \text{CO}_2(g)$$  
$\Delta H^\circ = +115.4\ \text{kJ mol}^{-1}$, $\Delta S^\circ = +147\ \text{J mol}^{-1} \text{K}^{-1}$ (298 K)  
At what temperature (in °C) does $\Delta G^\circ = 0$?""",

r"""Nernst Equation for a Mixed-Redox Cell  
At 25 °C, the cell is:  
Pt | V³⁺ (0.100 M), V²⁺ (0.0150 M) ‖ Ag⁺ (0.0500 M) | Ag  
Standard potentials:  
V²⁺ ⇌ V³⁺ + e E° = +0.255 V  
Ag⁺ + e⁻ ⇌ Ag E° = +0.799 V  
Calculate the cell potential $E_{\text{cell}}$ (in V).""",

r"""Calculate the pH at the equivalence point when 50.0 mL of 0.100 M NH₃ is titrated with 0.100 M HCl, given $K_b(\text{NH}_3)=1.8×10^{-5}$ and $K_w=1.0×10^{-14}$.""",

r"""Electronic solvation free energy  
A DFT calculation gives electronic energies for methyl α-D-glucopyranoside of  
$E_{\text{gas}} = -362.314$ hartree and $E_{\text{soln}} = -362.503$ hartree.  
(1 hartree = 2625.5 kJ mol⁻¹.)  
Calculate the electronic component of the solvation free energy  
$\Delta G_{\text{solv}} = E_{\text{soln}} - E_{\text{gas}}$ in kJ mol⁻¹. Report to three decimal places.""",

r"""Calculate the initial rate (mol$\cdot$L$^{-1}\cdot$s$^{-1}$) of cis-to-trans isomerization for a photoactivatable azobenzene derivative with quantum yield $\phi = 0.42$ under $650$ nm illumination with photon flux $1.5 \times 10^{16}$ photons$\cdot$s$^{-1}\cdot$cm$^{-2}$ in a $1$ mL solution contained in a $1$ cm$^{2}$ cuvette (pathlength $1$ cm) at initial concentration $5~\mu$M. Assume that 100% of the incident photons are absorbed by the solution (i.e., no transmission). Use Avogadro’s number $N_A = 6.022 \times 10^{23}$ mol$^{-1}$.""",

r"""A dendritic polymer bearing eight peripheral photo-cleavable o-nitrobenzyl ester groups is dissolved at $0.5$ mM in $1$ mL solution (pathlength $1$ cm), which is continuously illuminated at $365$ nm with intensity $10$ mW$\cdot$cm$^{-2}$ for $5$ minutes.  
Given molar extinction coefficient $\varepsilon = 4.5 \times 10^{3}$ L$\cdot$mol$^{-1}\cdot$cm$^{-1}$ and quantum yield of cleavage $\phi = 0.25$, calculate the concentration (mol$\cdot$L$^{-1}$) of cleaved ester groups produced, assuming all photons absorbed contribute to cleavage and uniform illumination across the cuvette cross-section. The polymer contains eight cleavable groups per molecule.""",

r"""A supramolecular intracellular assembly shows sigmoidal kinetics characterized by two activation free energy barriers at $310$ K:  
nucleation barrier $\Delta G^\ddagger_{\mathrm{nucleation}} = 75$ kJ$\cdot$mol$^{-1}$ and  
elongation barrier $\Delta G^\ddagger_{\mathrm{elongation}} = 40$ kJ$\cdot$mol$^{-1}$.  
Given the Arrhenius pre-exponential factor for nucleation $A_{\mathrm{nucleation}}$ is 100 times larger than that for elongation $A_{\mathrm{elongation}}$,  
calculate the ratio of their rate constants $k_{\mathrm{nucleation}}/k_{\mathrm{elongation}}$ at $310$ K.  
Use the gas constant $R = 8.314$ J$\cdot$mol$^{-1}\cdot$K$^{-1}$.""",

r"""A synthetic intracellular assembly formed from peptide amphiphiles has a critical micelle concentration (CMC) of $15~\mu$M at $37^\circ$C.  
After covalent attachment of a photo-cleavable group, the CMC increases to $45~\mu$M.  
Define the change in standard Gibbs free energy of micellization as  
$\Delta \Delta G^\circ = \Delta G^\circ_{\mathrm{modified}} - \Delta G^\circ_{\mathrm{original}}$.  
Calculate $\Delta \Delta G^\circ$ (kJ$\cdot$mol$^{-1}$) at $310$ K, assuming ideal solution behavior and using $R = 8.314$ J$\cdot$mol$^{-1}\cdot$K$^{-1}$.""",

r"""During photochemical control of intracellular assembly, the photoinduced rate constant $k_{\mathrm{photo}}$ follows first-order kinetics and depends on photon flux $I$ (photons$\cdot$cm$^{-2}\cdot$s$^{-1}$) as $k_{\mathrm{photo}} = \sigma I$, where $\sigma$ is the absorption cross-section.  
Given $\sigma = 2.5 \times 10^{-17}$ cm$^{2}$ and a target $k_{\mathrm{photo}} = 0.01$ s$^{-1}$ under $700$ nm illumination, calculate the required light intensity $I_{\mathrm{W}}$ in W$\cdot$cm$^{-2}$.  
Use Planck’s constant $h = 6.626 \times 10^{-34}$ J$\cdot$s and speed of light $c = 3.00 \times 10^{8}$ m$\cdot$s$^{-1}$.  
Show all steps starting from determining the photon flux $I$.""",

r"""Upon NIR irradiation at 800 nm, a photoactivatable supramolecular monomer is illuminated in a $1$ cm pathlength cuvette with monomer concentration $10~\mu$M and molar absorptivity $\varepsilon = 5{,}000$ L$\cdot$mol$^{-1}\cdot$cm$^{-1}$.  
Incident light intensity is $2 \times 10^{15}$ photons$\cdot$s$^{-1}\cdot$cm$^{-2}$ over a $0.5$ cm$^{2}$ cross-sectional area.  
Calculate the fraction of incident photons absorbed by the solution, assuming Beer–Lambert law applies and uniform illumination.""",

r"""Activation free energy $\Delta G^\ddagger$  
Data:  
• NMR coalescence at $T_c = 268$ K; $\Delta\nu = 120$ Hz  
• Rate at coalescence $k_c = \pi \cdot \Delta\nu/\sqrt{2}$  
• Eyring: $k_c = (k_B T_c/h)\cdot e^{–\Delta G^\ddagger/(R T_c)}$  
• $R = 8.314$ J$\cdot$K⁻¹$\cdot$mol⁻¹  
You may calculate $(k_B T_c/h)$ in one line.  
Report $\Delta G^\ddagger$ in kJ$\cdot$mol⁻¹ to three significant figures.""",

r"""**By given Spectroscopic Data Analysis, predict the name of the structure:**

**Molecular Formula:** $C_{20}H_{16}FN_{5}O_{2}S_{2}$

**IR (KBr, cm$^{-1}$):**

* 3352: N–H stretching (secondary amine)  
* 3220: N–H stretching (secondary amide)  
* 2891: C–H stretching ($\mathsf{-CH_3}$ group)  
* 2982: C–H stretching (aromatic ring)  
* 2772: C–H stretching ($\mathsf{-CH_2}$ group)  
* 1704: C=O stretching (secondary amide)  
* 1552: C=C stretching (aromatic ring)  
* 1290: C–H bending  
* 1194, 1088: C–O–C stretching  
* 1118: C–F stretching  
* 775: 1,2-disubstituted benzene ring  

**$^{1}$H NMR (300 MHz, DMSO-d$_{6}$, $\delta$ ppm):**

* 9.17 (s, 1H, Het–NH–CO–Ar)  
* 8.03–6.58 (m, 9H, Ar–H)  
* 4.44 (s, 2H, Het–CH$_{2}$)  
* 4.02 (s, 1H, Ar–NH–CH$_{2}$)  
* 2.47 (s, 3H, –N–C(CH$_{3}$)–C–)  

**$^{13}$C NMR (100 MHz, DMSO-d$_{6}$, $\delta$ ppm):**

$$
177.8,\ 166.3,\ 163.2,\ 156.2,\ 155.1,\ 154.2,\ 133.9,\ 132.5,\ 130.9\ (\times2),\ 129.2,\ 128.0,\ 125.7,\ 120.4,\ 116.3,\ 115.1,\ 104.1,\ 70.2,\ 17.4
$$

**LC-MS:**  
$$
m/z = 441.07\ (\mathrm{[M]^+})
$$"""
]

    all_questions = []
    for question in questions:
    #     # 1. Load the input JSON

    # 2. Iterate through all entries and augment
    # for container in json_list:
    #     for entry in container.get('data', []):
    #         q_text = entry.get('question')
    #         if not q_text:
    #             # skip anything without a question
    #             continue

    #         prompt = generate_prompt(q_text)
    #         print(f"Processing question: {q_text}")
    #         explanation, answer = together_api_call(prompt)
    #         print(f"Generated explanation: {explanation}")
    #         print(f"Extracted answer: {answer}")
    #         entry['explanation'] = explanation
    #         entry['answer'] = answer
      prompt = generate_prompt(question)
      print(f"Processing question: {question}")
      explanation, answer = together_api_call(prompt)
      print(f"Generated explanation: {explanation}")
      print(f"Extracted answer: {answer}")
      all_questions.append({
          
'question': question,
          'explanation': explanation,
          'answer': answer
      })



    return all_questions
    # # 3. Dump back out
    # with open(output_json_path, 'w') as f:
    #     json.dump(json_list, f, indent=2, ensure_ascii=False)






# Example usage:
processed = process_questions('final_questions_physics.json','output_with_answers.json')


Processing question: Thermodynamics of a Carbonate Decomposition  
$$\text{CoCO}_3(s) \rightarrow \text{CoO}(s) + \text{CO}_2(g)$$  
$\Delta H^\circ = +115.4\ \text{kJ mol}^{-1}$, $\Delta S^\circ = +147\ \text{J mol}^{-1} \text{K}^{-1}$ (298 K)  
At what temperature (in °C) does $\Delta G^\circ = 0$?
To determine the temperature at which the Gibbs free energy change (ΔG°) equals zero for the decomposition of carbonate, we use the equation:

$$
\Delta G^\circ = \Delta H^\circ - T \Delta S^\circ
$$

Given:
- $\Delta H^\circ = +115.4\ \text{kJ mol}^{-1}$
- $\Delta S^\circ = +147\ \text{J mol}^{-1} \text{K}^{-1}$
- $298\ \text{K}$ (standard temperature)

First, convert $\Delta S^\circ$ to kJ/mol·K to match the units with $\Delta H^\circ$:

$$
\Delta S^\circ = 147\ \text{J mol}^{-1} \text{K}^{-1} = 0.147\ \text{kJ mol}^{-1} \text{K}^{-1}
$$

Setting $\Delta G^\circ = 0$:

$$
0 = 115.4\ \text{kJ mol}^{-1} - T \cdot 0.147\ \text{kJ mol}^{-1} \text{K}^{-1}
$$

Solving for $T$:

$$
T = \frac{11