In [39]:
from openai import OpenAI
import pandas as pd
from dotenv import load_dotenv
import os

load_dotenv()

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [40]:
# Step 1: Upload CSV file
file = client.files.create(
    file=open("HR_Data.csv", "rb"),
    purpose="assistants"
)

In [41]:
# Step 2: Create an Assistant with Code Interpreter
assistant = client.beta.assistants.create(
  name="Data Analyst",
  instructions="Analyze CSV data. Generate insights, stats, and visualizations.",
  tools= [{"type": "code_interpreter"}],
  model="gpt-4-turbo"
)

In [54]:
# Step 3: Create a Thread and Ask Questions
thread = client.beta.threads.create()
message = client.beta.threads.messages.create(
  thread_id=thread.id,
  role="user",
  content="Give me a list of possible simple and complex queries I can run on this dataset.",
  attachments=[
      {
          "file_id": file.id,
          "tools": [{"type": "code_interpreter"}]
      }
  ]
)

In [55]:
run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
)

In [56]:
# Poll for completion and display results
while run.status != "completed":
  run = client.beta.threads.runs.retrieve(
    thread_id=thread.id,
    run_id=run.id
  )

In [None]:
messages = client.beta.threads.messages.list(thread_id=thread.id)
print(messages.data[0].content)
print(messages.data[0].content[0].text.value)

[TextContentBlock(text=Text(annotations=[], value="The dataset appears to contain various attributes related to employee data, such as demographic, job-related, and performance metrics. Here are some example queries we can run, categorized into simple and complex ones:\n\n### Simple Queries:\n1. **Count of Employees by Gender**: Determine the distribution of employees by gender.\n2. **Average Monthly Income by Department**: Compute the average monthly income in each department.\n3. **Employee Counts by Job Role**: List how many employees exist per job role.\n4. **Attrition Rate**: Calculate the attrition rate (percentage of employees who left the company).\n5. **Average Age of Employees**: Determine the average age of the workforce.\n\n### Complex Queries:\n1. **Histogram of Age and Income**: Provide a visual plot showing the distribution of ages and corresponding average incomes.\n2. **Attrition Rates by Department and Gender**: Break down attrition rates by department and gender to s