In [1]:
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv
import os

# Load Data

In [2]:
df = pd.read_csv("../data/diabetes.csv")
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


# Create template

In [3]:
facts_template = """
1. Number of times pregnant is {}.
2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test is {}.
3. Diastolic blood pressure (mm Hg) is {}.
4. Triceps skin fold thickness (mm) is {}.
5. 2-Hour serum insulin (mu U/ml) is {}.
6. Body mass index (weight in kg/(height in m)^2) is {}.
7. Diabetes pedigree function is {}.
8. Age (years) is {}.
"""

# Dataset 

In [9]:
input_list = []
for index, row in df.iterrows():
    example_patient = facts_template.format(row["Pregnancies"],
                    row["Glucose"], 
                    row["BloodPressure"],
                    row["SkinThickness"],
                    row["Insulin"],
                    row["BMI"],
                    row["DiabetesPedigreeFunction"],
                    row["Age"])
    
    input = "Give me a report of the patient status given theses mesurments: {}".format(example_patient)
    input_list.append(input)

In [12]:
 # Create the pandas DataFrame
synthetic_df = pd.DataFrame(input_list, columns=['input'])
synthetic_df.head()

Unnamed: 0,input
0,Give me a report of the patient status given t...
1,Give me a report of the patient status given t...
2,Give me a report of the patient status given t...
3,Give me a report of the patient status given t...
4,Give me a report of the patient status given t...


## Choose 10 samples and generate examples

In [14]:
sample_synthetic_df = synthetic_df.iloc[:10]

In [15]:
load_dotenv()
API_KEY = os.getenv('API_KEY')

In [16]:
def chat_completetion(messages, client, model):
    response = client.chat.completions.create(
    model=model,
    stream=False,
    messages=messages
    )

    return response.choices[0].message.content

In [17]:
# gpt call
client = OpenAI(api_key=API_KEY)

SYSTEM ="""
        - You are an assistant for a medical company 
        - Your objective is to analyse and diagnostically predict whether or not a patient has diabetes, based on certain diagnostic measurements.
        - You should format you output in a form of a lengthy report analysing and exmplaining the significanse of each measurments and how the measurments might affect you final jugment.
        """

MODEL= "gpt-4o"

responses_list = []
for index, row in sample_synthetic_df.iterrows():
    # messages
    MESSAGES = [
        {"role": "system", "content": SYSTEM},
        {"role": "user", "content": row["input"]},
    ]
    
    # output
    response = chat_completetion(MESSAGES, client, MODEL)

    # append output
    responses_list.append(response)


In [18]:
sample_synthetic_df['output'] = responses_list
sample_synthetic_df['instruction'] = [SYSTEM] * len(sample_synthetic_df)
sample_synthetic_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample_synthetic_df['output'] = responses_list
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample_synthetic_df['instruction'] = [SYSTEM] * len(sample_synthetic_df)


Unnamed: 0,input,output,instruction
0,Give me a report of the patient status given t...,## Comprehensive Report on Patient's Diabetes ...,\n - You are an assistant for a medical...
1,Give me a report of the patient status given t...,### Patient Diabetes Diagnostic Report\n\n####...,\n - You are an assistant for a medical...
2,Give me a report of the patient status given t...,### Patient Report: Diabetes Risk Analysis\n\n...,\n - You are an assistant for a medical...
3,Give me a report of the patient status given t...,# Patient Diagnostic Report for Diabetes Risk ...,\n - You are an assistant for a medical...
4,Give me a report of the patient status given t...,## Diagnostic Report\n\n### Patient Demographi...,\n - You are an assistant for a medical...


In [19]:
sample_synthetic_df.to_csv('../data/synthetic.csv')