In [1]:
import pandas as pd
import json

gcap_data = "/oak/stanford/groups/maggiori/GCAP/data/"
ai_geo1 = "/oak/stanford/groups/maggiori/GCAP/data/ai_geo1/"
model = "llama33_70b"
years = ['2025']

In [2]:
cols_list = ['tariffs_any', 
             'export_controls_any',
             'sanctions_any',
             'boycotts_any',
             'investment_screening_any',
             'geo_subsidies_any',
             'geoeconomic_any',
             'summary',
             'parse_error']

def safe_json_loads(output_str):
    try:
        result = {}
        parse_error = False
        # Remove the header/footer tags
        output_str = output_str.replace("<|start_header_id|>assistant<|end_header_id|>", "")
        output_str = output_str.replace("```json", "<JSON>")
        output_str = output_str.replace("```", "</JSON>")
        # Extract JSON portion between <JSON> and </JSON>
        json_start = output_str.find("<JSON>") + len("<JSON>")
        json_end = output_str.find("</JSON>")
        if json_start == -1 or json_end == -1:
            parse_error = True
        else:
            json_str = output_str[json_start:json_end]
            result = json.loads(json_str)
        
        summary_start = output_str.find("<SUMMARY>") + len("<SUMMARY>") 
        summary_end = output_str.find("</SUMMARY>")
        if summary_start == -1 or summary_end == -1:
            parse_error = True
        else:
            result['summary'] = output_str[summary_start:summary_end].strip()
        
        result['parse_error'] = "1" if parse_error else "0"
        
        return result
    except:
        return None


In [3]:
for year in years:
    df = pd.read_parquet(ai_geo1 + f"/temp/transcripts/broad_analysis_{model}_{year}_workshop_sample.parquet")
    parsed_outputs = [safe_json_loads(output) for output in df['llm_output']]

    all_dicts = []
    for parsed_output in parsed_outputs:
        output_dict = {}
        for col in cols_list:
            output_dict[col] = str(parsed_output.get(col)) if isinstance(parsed_output, dict) else None
        if parsed_output is None:
            output_dict['parse_error'] = "1"
        all_dicts.append(output_dict)

    for col in all_dicts[0].keys():
        df[col] = [d[col] for d in all_dicts]

    num_of_parse_errors = df[df['parse_error'] == "1"].shape[0]

    print(f"number of parse errors: {num_of_parse_errors} for year {year}")

    df.to_parquet(ai_geo1 + f"temp/transcripts/broad_analysis_{model}_{year}_workshop_sample_parsed.parquet", index=False)

number of parse errors: 0 for year 2025


In [6]:
print(df.iloc[0].llm_output)

<|start_header_id|>assistant<|end_header_id|>

<JSON>
{
  "tariffs_any": 1,
  "sanctions_any": 0,
  "export_controls_any": 0,
  "boycotts_any": 0,
  "investment_screening_any": 0,
  "geo_subsidies_any": 0,
  "geoeconomic_any": 1
}
</JSON>

<SUMMARY>
The company discusses the potential impact of tariffs on their business, mentioning that they have manufacturing facilities in the U.S., Europe, and Asia, which gives them flexibility to address any tariff situation. They also mention that they are well-situated to handle tariffs due to their global manufacturing presence. However, they do not provide any specific details on how tariffs have affected their business in the past or how they plan to mitigate any potential negative impacts in the future.

The company does not discuss sanctions, export controls, boycotts, investment screening, or geoeconomic subsidies in the context of their business. However, they do mention the potential for geoeconomic pressure, particularly with regards to t

In [5]:
flags = ['tariffs_any', 
         'export_controls_any',
         'sanctions_any',
         'boycotts_any',
         'investment_screening_any',
         'geo_subsidies_any',
         'geoeconomic_any']


for year in years:
    df_new = pd.read_parquet(ai_geo1 + f"temp/transcripts/broad_analysis_{model}_{year}_workshop_sample_parsed.parquet")
    stat = {}
    for flag in flags:
        stat[flag] = len(df_new[df_new[flag] == "1"])
    print(f"{year}: {json.dumps(stat)}")
    

2025: {"tariffs_any": 44, "export_controls_any": 1, "sanctions_any": 0, "boycotts_any": 0, "investment_screening_any": 0, "geo_subsidies_any": 2, "geoeconomic_any": 45}
