In [1]:
import pandas as pd
import numpy as np

In [2]:
# Load data from parquet file
df_tmp = pd.read_parquet('./data/Results/df_train_new.parquet')

In [3]:
ratio_chars = ['CAPEI', 'bm',
       'evm', 'pe_exi', 'pe_inc', 'ps', 'pcf',
       'dpr', 'npm', 'opmbd', 'opmad', 'gpm', 'ptpm', 'cfm', 'roa', 'roe',
       'roce', 'efftax', 'aftret_eq', 'aftret_invcapx', 'aftret_equity',
       'pretret_noa', 'pretret_earnat', 'GProf', 'equity_invcap',
       'debt_invcap', 'totdebt_invcap', 'capital_ratio', 'int_debt',
       'int_totdebt', 'cash_lt', 'invt_act', 'rect_act', 'debt_at',
       'debt_ebitda', 'short_debt', 'curr_debt', 'lt_debt', 'profit_lct',
       'ocf_lct', 'cash_debt', 'fcf_ocf', 'lt_ppent', 'dltt_be', 'debt_assets',
       'debt_capital', 'de_ratio', 'intcov', 'intcov_ratio', 'cash_ratio',
       'quick_ratio', 'curr_ratio', 'cash_conversion', 'inv_turn', 'at_turn',
       'rect_turn', 'pay_turn', 'sale_invcap', 'sale_equity', 'sale_nwc',
       'rd_sale', 'adv_sale', 'staff_sale', 'accrual', 'ptb', 'PEG_trailing',
       'divyield']

per_share_chars = ['dividend_p','BE_p','Liability_p','cur_liability_p','LT_debt_p',
                  'cash_p', 'total_asset_p', 'tot_debt_p', 'accrual_p', 'EBIT_p', 
                   'cur_asset_p', 'pbda_p', 'ocf_p', 'inventory_p', 'receivables_p',
                   'Cur_debt_p', 'interest_p', 'fcf_ocf_p', 'evm_p',
                   'sales_p', 'invcap_p', 'c_equity_p', 'rd_p', 'opmad_p', 'gpm_p','ptpm_p'
                  ]

macro_chars = ['RGDP', 'RCON', 'INDPROD', 'UNEMP']

fundamental_chars = ['ret', 'prc',
                    'EPS_true_l1_q1','EPS_true_l1_q2','EPS_true_l1_q3',
                    'EPS_true_l1_y1','EPS_true_l1_y2',
                    ]

analyst_chars = ['EPS_ana_q1','EPS_ana_q2','EPS_ana_q3','EPS_ana_y1','EPS_ana_y2']

targets = ['EPS_true_q1', 'EPS_true_q2', 'EPS_true_q3', 'EPS_true_y1', 'EPS_true_y2']

In [5]:
# List to store reorganized data
model_data_all = []

# Define the predictor columns and label columns (based on your original loop)
for q in ['q1', 'q2', 'q3', 'y1', 'y2']:
    if q in ['q1', 'q2', 'q3']:
        months_back = 12
    else:
        months_back = 24 if q == 'y2' else 12

    # Define the predictor columns
    x_cols = ratio_chars + ['ret', 'prc', f'EPS_true_l1_{q}', f'EPS_ana_{q}'] + ['RGDP', 'RCON', 'INDPROD', 'UNEMP']
    y_col = f'EPS_true_{q}'
    ann_date_col = f'ANNDATS_{q}'

    # Reorganize the whole dataset into predictor (X) and label (Y) pairs
    for t in df_tmp['YearMonth'].unique():
        df_data = df_tmp[df_tmp['YearMonth'] == t].copy()

        # Ensure there's no leakage of future information
        df_data = df_data[df_data[ann_date_col] <= t]  # Exclude future announcements

        # Drop NA and infinite values
        df_data = df_data.replace([np.inf, -np.inf], np.nan).dropna(subset=x_cols + [y_col])

        # Skip if the data is empty after cleaning
        if df_data.empty:
            continue

        # Create a DataFrame for the model
        df_model = df_data[x_cols].copy()
        df_model['label'] = df_data[y_col].values
        df_model['permno'] = df_data['permno'].values
        df_model['YearMonth'] = df_data['YearMonth'].values
        df_model['horizon'] = q  # Add the forecast horizon

        # Append to the list of model data
        model_data_all.append(df_model)

# Combine all model rows into one DataFrame
model_data_df = pd.concat(model_data_all, ignore_index=True)

model_data_df['prediction'] = np.nan  # placeholder

# Optional: Reorder columns for clarity
cols_first = ['permno', 'YearMonth', 'horizon', 'label', 'prediction']
other_cols = [col for col in model_data_df.columns if col not in cols_first]
model_data_df = model_data_df[cols_first + other_cols]

model_data_df.head()

Unnamed: 0,permno,YearMonth,horizon,label,prediction,CAPEI,bm,evm,pe_exi,pe_inc,...,INDPROD,UNEMP,EPS_true_l1_q2,EPS_ana_q2,EPS_true_l1_q3,EPS_ana_q3,EPS_true_l1_y1,EPS_ana_y1,EPS_true_l1_y2,EPS_ana_y2
0,10079.0,1989-03-31,q1,-0.35,,21.085184,1.020966,-4.142299,-3.298611,-3.298611,...,0.00355,5.4,,,,,,,,
1,10094.0,1989-03-31,q1,1.13,,8.323051,1.035691,5.501297,0.268518,0.268518,...,0.00355,5.4,,,,,,,,
2,10104.0,1989-03-31,q1,0.36,,72.320234,0.161515,10.875978,26.162766,26.162766,...,0.00355,5.4,,,,,,,,
3,10171.0,1989-03-31,q1,0.05,,16.07111,0.623432,6.882616,12.142323,12.142323,...,0.00355,5.4,,,,,,,,
4,10179.0,1989-03-31,q1,0.35,,11.771166,0.945508,9.975825,7.949438,8.012821,...,0.00355,5.4,,,,,,,,


In [6]:
# Save or return the final model_data_df
model_data_df.to_csv('./data/Results/model_data_df.csv', index=False)

In [8]:
model_data_df.columns

Index(['permno', 'YearMonth', 'forecast_time', 'horizon', 'label',
       'prediction', 'CAPEI', 'bm', 'evm', 'pe_exi', 'pe_inc', 'ps', 'pcf',
       'dpr', 'npm', 'opmbd', 'opmad', 'gpm', 'ptpm', 'cfm', 'roa', 'roe',
       'roce', 'efftax', 'aftret_eq', 'aftret_invcapx', 'aftret_equity',
       'pretret_noa', 'pretret_earnat', 'GProf', 'equity_invcap',
       'debt_invcap', 'totdebt_invcap', 'capital_ratio', 'int_debt',
       'int_totdebt', 'cash_lt', 'invt_act', 'rect_act', 'debt_at',
       'debt_ebitda', 'short_debt', 'curr_debt', 'lt_debt', 'profit_lct',
       'ocf_lct', 'cash_debt', 'fcf_ocf', 'lt_ppent', 'dltt_be', 'debt_assets',
       'debt_capital', 'de_ratio', 'intcov', 'intcov_ratio', 'cash_ratio',
       'quick_ratio', 'curr_ratio', 'cash_conversion', 'inv_turn', 'at_turn',
       'rect_turn', 'pay_turn', 'sale_invcap', 'sale_equity', 'sale_nwc',
       'rd_sale', 'adv_sale', 'staff_sale', 'accrual', 'ptb', 'PEG_trailing',
       'divyield', 'ret', 'prc', 'EPS_true_l1_q

In [16]:
def generate_gpt_prompt_from_index(index, df):
    # Mapping of variable names to natural language descriptions
    variable_map = {
        'CAPEI': 'Shiller P/E ratio',
        'bm': 'Book-to-Market Ratio',
        'evm': 'Enterprise Value Multiple',
        'pe_exi': 'Price-to-Earnings (excl. extraordinary items)',
        'pe_inc': 'Price-to-Earnings (incl. extraordinary items)',
        'ps': 'Price-to-Sales Ratio',
        'pcf': 'Price-to-Cash Flow Ratio',
        'dpr': 'Dividend Payout Ratio',
        'npm': 'Net Profit Margin',
        'opmbd': 'Operating Margin Before Depreciation',
        'opmad': 'Operating Margin After Depreciation',
        'gpm': 'Gross Profit Margin',
        'ptpm': 'Pretax Profit Margin',
        'cfm': 'Cash Flow Margin',
        'roa': 'Return on Assets',
        'roe': 'Return on Equity',
        'roce': 'Return on Capital Employed',
        'efftax': 'Effective Tax Rate',
        'aftret_eq': 'After-Tax Return on Equity',
        'aftret_invcapx': 'After-Tax Return on Invested Capital',
        'aftret_equity': 'After-Tax Return on Equity (again)',
        'pretret_noa': 'Pretax Return on Net Operating Assets',
        'pretret_earnat': 'Pretax Return on Earnings Assets',
        'GProf': 'Gross Profitability',
        'equity_invcap': 'Equity to Invested Capital',
        'debt_invcap': 'Debt to Invested Capital',
        'totdebt_invcap': 'Total Debt to Invested Capital',
        'capital_ratio': 'Capital Ratio',
        'int_debt': 'Interest-Bearing Debt',
        'int_totdebt': 'Interest-Bearing to Total Debt',
        'cash_lt': 'Cash to Long-Term Assets',
        'invt_act': 'Inventory to Current Assets',
        'rect_act': 'Receivables to Current Assets',
        'debt_at': 'Debt to Total Assets',
        'debt_ebitda': 'Debt to EBITDA',
        'short_debt': 'Short-Term Debt',
        'curr_debt': 'Current Debt',
        'lt_debt': 'Long-Term Debt',
        'profit_lct': 'Profit to Liabilities',
        'ocf_lct': 'Operating Cash Flow to Liabilities',
        'cash_debt': 'Cash to Debt',
        'fcf_ocf': 'Free Cash Flow to Operating Cash Flow',
        'lt_ppent': 'Long-Term Assets to PP&E',
        'dltt_be': 'Long-Term Debt to Book Equity',
        'debt_assets': 'Debt to Total Assets',
        'debt_capital': 'Debt to Capital',
        'de_ratio': 'Debt-to-Equity Ratio',
        'intcov': 'Interest Coverage',
        'intcov_ratio': 'Interest Coverage Ratio',
        'cash_ratio': 'Cash Ratio',
        'quick_ratio': 'Quick Ratio',
        'curr_ratio': 'Current Ratio',
        'cash_conversion': 'Cash Conversion Cycle',
        'inv_turn': 'Inventory Turnover',
        'at_turn': 'Asset Turnover',
        'rect_turn': 'Receivables Turnover',
        'pay_turn': 'Payables Turnover',
        'sale_invcap': 'Sales to Invested Capital',
        'sale_equity': 'Sales to Equity',
        'sale_nwc': 'Sales to Net Working Capital',
        'rd_sale': 'R&D to Sales',
        'adv_sale': 'Advertising to Sales',
        'staff_sale': 'Staff Expense to Sales',
        'accrual': 'Accruals',
        'ptb': 'Price-to-Book Ratio',
        'PEG_trailing': 'PEG Ratio (Trailing)',
        'divyield': 'Dividend Yield',
        'ret': 'Stock Return',
        'prc': 'Price',
        'EPS_true_l1_q1': 'Lagged True EPS for q1',
        'EPS_ana_q1': 'Analyst Forecast EPS for q1',
        'RGDP': 'Real GDP',
        'RCON': 'Consumer Spending',
        'INDPROD': 'Industrial Production',
        'UNEMP': 'Unemployment Rate',
        'EPS_true_l1_q2': 'Lagged True EPS for q2',
        'EPS_ana_q2': 'Analyst Forecast EPS for q2',
        'EPS_true_l1_q3': 'Lagged True EPS for q3',
        'EPS_ana_q3': 'Analyst Forecast EPS for q3',
        'EPS_true_l1_y1': 'Lagged True EPS for y1',
        'EPS_ana_y1': 'Analyst Forecast EPS for y1',
        'EPS_true_l1_y2': 'Lagged True EPS for y2',
        'EPS_ana_y2': 'Analyst Forecast EPS for y2',
    }

    row = df.loc[index]
    # Format values with their descriptions
    formatted_pairs = [f"{variable_map[col]}: {row[col]}" for col in df.columns if col in variable_map]
    # Combine all into a string with spacing, not newlines
    joined_description = "  ".join(formatted_pairs)

    prompt = (
        "I'm analyzing a firm and would like you to predict its upcoming earnings per share. "
        "Here are the financial and market characteristics for the firm: "
        f"{joined_description} "
        "Can you forecast the firm’s future earnings for quarters q1, q2, q3, y1, and y2 based on these characteristics? "
        "Please do not search the web."
    )
    return prompt


In [17]:
generate_gpt_prompt_from_index(0, model_data_df)

"I'm analyzing a firm and would like you to predict its upcoming earnings per share. Here are the financial and market characteristics for the firm: Shiller P/E ratio: 21.085184337073507  Book-to-Market Ratio: 1.0209656925031767  Enterprise Value Multiple: -4.142299448867115  Price-to-Earnings (excl. extraordinary items): -3.298611111111111  Price-to-Earnings (incl. extraordinary items): -3.298611111111111  Price-to-Sales Ratio: 0.8162163599617063  Price-to-Cash Flow Ratio: -3.327515177797053  Dividend Payout Ratio: 0.0  Net Profit Margin: -0.2407190724391022  Operating Margin Before Depreciation: -0.34740985001595576  Operating Margin After Depreciation: -0.4003829379853207  Gross Profit Margin: 0.2285927029039464  Pretax Profit Margin: -0.4064461227528986  Cash Flow Margin: -0.17998085310073397  Return on Assets: -0.21659460754194718  Return on Equity: -0.23476321385964002  Return on Capital Employed: -0.3485687012194894  Effective Tax Rate: 0.3603448275862069  After-Tax Return on Eq

### Y1 only

In [23]:
def generate_gpt_prompt_from_index_y1(index, df, precision=3):
    # Mapping of variable names to natural language descriptions
    variable_map = {
        'CAPEI': 'Shiller P/E ratio',
        'bm': 'Book-to-Market Ratio',
        'evm': 'Enterprise Value Multiple',
        'pe_exi': 'Price-to-Earnings (excl. extraordinary items)',
        'pe_inc': 'Price-to-Earnings (incl. extraordinary items)',
        'ps': 'Price-to-Sales Ratio',
        'pcf': 'Price-to-Cash Flow Ratio',
        'dpr': 'Dividend Payout Ratio',
        'npm': 'Net Profit Margin',
        'opmbd': 'Operating Margin Before Depreciation',
        'opmad': 'Operating Margin After Depreciation',
        'gpm': 'Gross Profit Margin',
        'ptpm': 'Pretax Profit Margin',
        'cfm': 'Cash Flow Margin',
        'roa': 'Return on Assets',
        'roe': 'Return on Equity',
        'roce': 'Return on Capital Employed',
        'efftax': 'Effective Tax Rate',
        'aftret_eq': 'After-Tax Return on Equity',
        'aftret_invcapx': 'After-Tax Return on Invested Capital',
        'aftret_equity': 'After-Tax Return on Equity (again)',
        'pretret_noa': 'Pretax Return on Net Operating Assets',
        'pretret_earnat': 'Pretax Return on Earnings Assets',
        'GProf': 'Gross Profitability',
        'equity_invcap': 'Equity to Invested Capital',
        'debt_invcap': 'Debt to Invested Capital',
        'totdebt_invcap': 'Total Debt to Invested Capital',
        'capital_ratio': 'Capital Ratio',
        'int_debt': 'Interest-Bearing Debt',
        'int_totdebt': 'Interest-Bearing to Total Debt',
        'cash_lt': 'Cash to Long-Term Assets',
        'invt_act': 'Inventory to Current Assets',
        'rect_act': 'Receivables to Current Assets',
        'debt_at': 'Debt to Total Assets',
        'debt_ebitda': 'Debt to EBITDA',
        'short_debt': 'Short-Term Debt',
        'curr_debt': 'Current Debt',
        'lt_debt': 'Long-Term Debt',
        'profit_lct': 'Profit to Liabilities',
        'ocf_lct': 'Operating Cash Flow to Liabilities',
        'cash_debt': 'Cash to Debt',
        'fcf_ocf': 'Free Cash Flow to Operating Cash Flow',
        'lt_ppent': 'Long-Term Assets to PP&E',
        'dltt_be': 'Long-Term Debt to Book Equity',
        'debt_assets': 'Debt to Total Assets',
        'debt_capital': 'Debt to Capital',
        'de_ratio': 'Debt-to-Equity Ratio',
        'intcov': 'Interest Coverage',
        'intcov_ratio': 'Interest Coverage Ratio',
        'cash_ratio': 'Cash Ratio',
        'quick_ratio': 'Quick Ratio',
        'curr_ratio': 'Current Ratio',
        'cash_conversion': 'Cash Conversion Cycle',
        'inv_turn': 'Inventory Turnover',
        'at_turn': 'Asset Turnover',
        'rect_turn': 'Receivables Turnover',
        'pay_turn': 'Payables Turnover',
        'sale_invcap': 'Sales to Invested Capital',
        'sale_equity': 'Sales to Equity',
        'sale_nwc': 'Sales to Net Working Capital',
        'rd_sale': 'R&D to Sales',
        'adv_sale': 'Advertising to Sales',
        'staff_sale': 'Staff Expense to Sales',
        'accrual': 'Accruals',
        'ptb': 'Price-to-Book Ratio',
        'PEG_trailing': 'PEG Ratio (Trailing)',
        'divyield': 'Dividend Yield',
        'ret': 'Stock Return',
        'prc': 'Price',
        'EPS_true_l1_q1': 'Lagged True EPS for q1',
        'EPS_ana_q1': 'Analyst Forecast EPS for q1',
        'RGDP': 'Real GDP',
        'RCON': 'Consumer Spending',
        'INDPROD': 'Industrial Production',
        'UNEMP': 'Unemployment Rate',
        'EPS_true_l1_q2': 'Lagged True EPS for q2',
        'EPS_ana_q2': 'Analyst Forecast EPS for q2',
        'EPS_true_l1_q3': 'Lagged True EPS for q3',
        'EPS_ana_q3': 'Analyst Forecast EPS for q3',
        'EPS_true_l1_y1': 'Lagged True EPS for y1',
        'EPS_ana_y1': 'Analyst Forecast EPS for y1',
        'EPS_true_l1_y2': 'Lagged True EPS for y2',
        'EPS_ana_y2': 'Analyst Forecast EPS for y2',
    }

    row = df.loc[index]

    # Format variable values with `precision` decimals
    formatted_pairs = [
        f"{variable_map[col]}: {round(row[col], precision)}"
        for col in df.columns
        if col in variable_map and pd.notnull(row[col])
    ]
    joined_description = "  ".join(formatted_pairs)

    prompt = (
        "I'm analyzing a firm and would like you to predict its upcoming earnings per share. "
        "Here are the financial and market characteristics for the firm: "
        f"{joined_description} "
        "Can you forecast the firm’s EPS next year based on these characteristics? "
        "Please do not search the web."
    )
    return prompt

In [8]:
df_y1 = model_data_df[model_data_df['horizon'] == 'y1'].copy()
df_y1 = df_y1.reset_index(drop=True)
df_y1.head

<bound method NDFrame.head of         permno  YearMonth horizon   label  prediction      CAPEI        bm  \
0      10061.0 1989-03-31      y1 -1.5600         NaN  11.771166  0.945508   
1      10094.0 1989-03-31      y1  0.9000         NaN   8.323051  1.035691   
2      10154.0 1989-03-31      y1  0.5000         NaN   7.291511  1.798667   
3      10171.0 1989-03-31      y1 -0.1200         NaN  16.071110  0.623432   
4      10182.0 1989-03-31      y1  1.3900         NaN  37.472733  0.678223   
...        ...        ...     ...     ...         ...        ...       ...   
59183  89456.0 1986-08-31      y1  0.7995         NaN  17.768679  0.220628   
59184  90107.0 1986-08-31      y1  0.7000         NaN  13.182111  1.052847   
59185  90799.0 1986-08-31      y1  0.3600         NaN  22.298897  0.225579   
59186  91986.0 1986-08-31      y1  0.5300         NaN  21.143680  0.435517   
59187  92217.0 1986-08-31      y1  0.3000         NaN  20.203829  0.269500   

             evm     pe_exi     p

In [4]:
df_y1.to_csv('df_y1_with_gpt_predictions.csv', index=False)

NameError: name 'df_y1' is not defined

In [10]:
apple = df_y1[df_y1['permno'] == 14593].copy()
df_y1.to_csv('df_y1_with_gpt_predictions.csv', index=False)
apple.to_csv('df_AAPL_gpt_predictions.csv', index=False)
# generate_gpt_prompt_from_index_y1(0, apple.reset_index(drop=True))

In [25]:
generate_gpt_prompt_from_index_y1(0, df_y1)

"I'm analyzing a firm and would like you to predict its upcoming earnings per share. Here are the financial and market characteristics for the firm: Shiller P/E ratio: 11.771  Book-to-Market Ratio: 0.946  Enterprise Value Multiple: 9.976  Price-to-Earnings (excl. extraordinary items): 7.949  Price-to-Earnings (incl. extraordinary items): 8.013  Price-to-Sales Ratio: 0.707  Price-to-Cash Flow Ratio: 1.54  Dividend Payout Ratio: 0.291  Net Profit Margin: 0.089  Operating Margin Before Depreciation: 0.229  Operating Margin After Depreciation: 0.228  Gross Profit Margin: 0.447  Pretax Profit Margin: 0.124  Cash Flow Margin: 0.086  Return on Assets: 0.024  Return on Equity: 0.16  Return on Capital Employed: 0.097  Effective Tax Rate: 0.255  After-Tax Return on Equity: 0.156  After-Tax Return on Invested Capital: 0.0  After-Tax Return on Equity (again): 0.153  Pretax Return on Net Operating Assets: 0.194  Pretax Return on Earnings Assets: 0.135  Gross Profitability: 0.045  Equity to Invested

In [26]:
generate_gpt_prompt_from_index_y1(1, df_y1)

"I'm analyzing a firm and would like you to predict its upcoming earnings per share. Here are the financial and market characteristics for the firm: Shiller P/E ratio: 8.323  Book-to-Market Ratio: 1.036  Enterprise Value Multiple: 5.501  Price-to-Earnings (excl. extraordinary items): 0.269  Price-to-Earnings (incl. extraordinary items): 0.269  Price-to-Sales Ratio: 0.348  Price-to-Cash Flow Ratio: 6.261  Dividend Payout Ratio: 0.0  Net Profit Margin: 0.043  Operating Margin Before Depreciation: 0.085  Operating Margin After Depreciation: 0.07  Gross Profit Margin: 0.436  Pretax Profit Margin: 0.065  Cash Flow Margin: 0.058  Return on Assets: 0.166  Return on Equity: 0.124  Return on Capital Employed: 0.158  Effective Tax Rate: 0.34  After-Tax Return on Equity: 0.128  After-Tax Return on Invested Capital: 0.129  After-Tax Return on Equity (again): 0.128  Pretax Return on Net Operating Assets: 0.159  Pretax Return on Earnings Assets: 0.139  Gross Profitability: 0.802  Equity to Invested 

In [None]:
import openai
import pandas as pd
import time

# Set your OpenAI API key
openai.api_key = 'soSco5-jedqyf-zihdev'

# Function to generate a prompt for a single row
def generate_gpt_prompt_from_index_y1(index, df):
    row = df.iloc[index]
    firm_info = "\n".join([
        f"- {col}: {row[col]}" 
        for col in df.columns 
        if col not in ['permno', 'YearMonth', 'forecast_time', 'horizon', 'label', 'prediction']
    ])
    prompt = f"""I'm analyzing a firm with the following financial characteristics for forecast horizon y1:

{firm_info}

Please predict the upcoming Earnings Per Share (EPS). Just return the numeric value."""
    return prompt

# Function to call ChatGPT API
def call_chatgpt(prompt, model="gpt-4o"):
    try:
        response = openai.ChatCompletion.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful financial analyst."},
                {"role": "user", "content": prompt}
            ],
            temperature=0,  # make it deterministic
            max_tokens=50
        )
        # Extract the assistant's reply
        reply = response['choices'][0]['message']['content']
        return reply.strip()
    except Exception as e:
        print(f"Error: {e}")
        return None

# Assuming df_y1 already exists
df_y1['gpt_prediction'] = None  # Create empty column to store GPT results

# Loop over rows
for idx in range(len(df_y1)):
    prompt = generate_gpt_prompt_from_index_y1(idx, df_y1)
    print(f"Sending prompt for index {idx}...")  # Optional: to track progress
    prediction = call_chatgpt(prompt)
    df_y1.at[idx, 'gpt_prediction'] = prediction
    
    time.sleep(1.2)  # Sleep to respect OpenAI rate limits and avoid getting blocked

# Save results if needed
df_y1.to_csv('df_y1_with_gpt_predictions.csv', index=False)