In [None]:
import pandas as pd
import time
from datetime import datetime
from openai import OpenAI
from tqdm import tqdm

# Initialize OpenAI client with your API key
client = OpenAI(api_key=")

In [7]:
# Load the CSV
df = pd.read_csv("infoevents.csv")

# Check your actual column names
print("Available columns:", df.columns)

# Define relevant columns
DESCRIPTION_COL = "targetbusinessdescription"
SYNOPSIS_COL = "dealsynopsis"
DATE_COL = "dateannounced"

# Add new columns
df["category"] = ""
df["explanation"] = ""
df["confidence"] = ""

# Loop through each row and send to OpenAI
for i, row in tqdm(df.iterrows(), total=len(df)):
    try:
        description = row[DESCRIPTION_COL]
        synopsis = row[SYNOPSIS_COL]
        date = row[DATE_COL]

        user_prompt = f"""
You are an expert consultant in the automobile industry. I would like your help classifying a set of investments made by automakers (OEMs).

In each investment, an OEM acquires a target company. The target offers a product or service that may fall into one of three categories:
1. The product or service directly supports the internal combustion engine (ICEV). Common keywords include: motor, components, diesel, gasoline.
2. The product or service directly supports the electric engine (EV) technologies. Common keywords include: electric, electricity, battery, lithium.
3. The product or service is directly related to neither ICEV nor EV. This may include upstream product/service (e.g., software, parts, etc) or downstream product/service (i.e., distributor, wholesale, etc).

Please keep in mind  the following suggestions:
- If one of the activities of the target is manufacturing, then it can only be 1 or 2. If there is no specific information, assume it is 1.

Please consider the following information about the target company:
- Description: \"\"\"{description}\"\"\"
- Deal Synopsis: \"\"\"{synopsis}\"\"\"
- Date Announced: {date}

Please:
- Classify the product or service into one of the three categories (1, 2, or 3)
- Provide a brief explanation (20–50 words)
- Rate your confidence in the classification on a scale from 1 (no confidence) to 100 (full confidence)

Respond in the following format:
Category: [1/2/3]
Explanation: [your explanation]
Confidence: [number]
"""

        response = client.chat.completions.create(
            model="o3-mini",
            messages=[
                {"role": "system", "content": "You are an expert consultant in the automobile industry."},
                {"role": "user", "content": user_prompt}
            ]
        )

        content = response.choices[0].message.content

        # Robust parsing
        for line in content.splitlines():
            line_lower = line.lower().strip()
            if line_lower.startswith("category:"):
                df.at[i, "category"] = int(line.split(":")[1].strip())
            elif line_lower.startswith("explanation:"):
                df.at[i, "explanation"] = line.split(":", 1)[1].strip()
            elif line_lower.startswith("confidence:"):
                df.at[i, "confidence"] = int(line.split(":")[1].strip())

        time.sleep(1.2)  # Optional: avoid rate limits

    except Exception as e:
        print(f"Error processing row {i}: {e}")
        df.at[i, "category"] = "error"
        df.at[i, "explanation"] = str(e)
        df.at[i, "confidence"] = 0

# Save results
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"infoevents_with_predictions_{timestamp}.csv"
df.to_csv(filename, index=False)
print(f"✅ Saved to {filename}")


Available columns: Index(['sdcdealno', 'acquirorfullname', 'targetfullname',
       'targetbusinessdescription', 'dealsynopsis', 'dateannounced'],
      dtype='object')


100%|██████████| 1473/1473 [2:10:09<00:00,  5.30s/it]  

✅ Saved to infoevents_with_predictions_20250514_005527.csv



