#  Data Analysis With LLMs

 parts were originally based on Datacamp's code-along "Introduction to Large Language Models with GPT & LangChain"

In [22]:
# ! pip install plotly, pandas

In [None]:
import os
import openai
from dotenv import load_dotenv
load_dotenv() # Load the .env file

In [24]:
# Set openai.api_key to the OPENAI_API_KEY environment variable
openai.api_key = os.environ["OPENAI_API_KEY"]

In [25]:
# Import the langchain package as lc
import langchain as lc

# From the langchain.chat_models module, import ChatOpenAI
from langchain.chat_models import ChatOpenAI

# From the langchain.schema module, import AIMessage, HumanMessage, SystemMessage
from langchain.schema import AIMessage, HumanMessage, SystemMessage

# Import pandas using the alias pd
import pandas as pd

# Import plotly.express using the alias px
import plotly.express as px

# From the IPython.display package, import display and Markdown
from IPython.display import display, Markdown

Load a small fictitious dataset of patient hospital stays 

In [None]:
df_patient_stay = pd.read_csv("resources/patient-stay.csv")

# Display a description of the numeric columns
print("Description of numeric columns\n")
display(df_patient_stay.describe())

# Display a description of the text (object) columns
print("Description of text columns\n")
display(df_patient_stay.describe(include="O"))

# Print the whole dataset
print("The first few rows\n")
df_patient_stay.head(2)

## Ask ChatGPT a question

In [28]:
from openai import OpenAI
client = OpenAI()

# Define the system message. 
system_msg_test = """You are a helpful assistant who understands data analysis.
 You write in a clear language that a ten year old can understand.
 You keep your answers brief."""

# Define the user message.
user_msg_test = "Tell me some uses of ChatGPT for data analysis."

# Create a message list from the system and user messages. Assign to msgs_test.
msgs_test = [
    {"role": "system", "content": system_msg_test},
    {"role": "user", "content": user_msg_test}
]

# Send the messages to GPT. Assign to response_test.
response_test = completion = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=msgs_test
)

print("The entire response\n")
display(response_test.model_dump_json())
print("\n\n")
print("The response's content\n")
print(response_test.choices[0].message.content)

## Ask a Question About the Dataset

In [30]:
# A description of the dataset
dataset_description = 
"""
The PatientStay table represents patients admitted to a ward in a hospital. 
Each row in the table represents a patient admission. 
The dataset contains the following columns.
- PatientId (int): The unique ID of the patient.
- AdmittedDate (date): The date on which the patient was admitted to the hospital.
- DischargeDate (date): The date on which the patient was discharged from the hospital.
- Hospital (varchar): The name of the hospital.
- Ward (varchar): The name of the ward.
- Tariff (float): The cost to treat the patient.
- Ethnicity (varchar): The ethnicity of the patient e.g., White, Asian. For some rows this value is missing (null).
"""

# Create a task for the AI. Assign to suggest_questions.
suggest_questions = "Suggest some data analysis questions that could be answered with this dataset."

# Concatenate the dataset description and the request. Assign to msgs_suggest_questions.
msgs_suggest_questions = [
    SystemMessage(content="You are a data analysis expert."),
    HumanMessage(content=f"{dataset_description}\n\n{suggest_questions}")
]

In [None]:
chat = ChatOpenAI()
response_suggest_questions = chat(msgs_suggest_questions)

# Print the response
print("The entire response\n")
print(response_suggest_questions)
print("\n\n")

print("The response's content\n")
print(response_suggest_questions.content)
print("\n\n")

print("The type of the response\n")
print(type(response_suggest_questions))

### Hold a conversation with GPT

In [None]:
# Append the response and a new message to the previous messages. 
# Assign to msgs_python_top_models.
msgs_python_top_patients = msgs_suggest_questions + [
    response_suggest_questions,
    HumanMessage(
        content="Write some Python code to find the 5 patients with the highest tariff.")
]

# Pass your message to GPT. Assign to response_python_top_models.
response_python_top_patients = chat(msgs_python_top_patients)

# Display the response's Markdown content
display(Markdown(response_python_top_patients.content))