# V1 GPT API PULL

Heyo,

I added some quality of life improvements to the chat interface and added an automatic API pull part to the script. Still working on how to get GPT to analyze the data.


In [8]:
import os
import openai
import pandas as pd
import requests

#Config items
num_retries = 3
openai.api_key = open("/Users/jackogozaly/Desktop/Python_Directory/key.txt", "r").read().strip("\n")
quickstats_api_key = open("/Users/jackogozaly/Desktop/Python_Directory/nass_key.txt", "r").read().strip("\n")


#Model intialization
#Th is the prompt we are giving to the model to setup how it will work
message_history = [{"role": "user", "content": 
                    """
                    You are a large language model trained to convert questions about agricultural data into NASS Quickstats API links. 
                    When answering a question only provide the URL link and skip any other ouputs unless it is a task you cannot do. Do not provide any instructions other than an API link.
                    If you can complete the task, respond with 'SUCCESS' followed immediately by the API link. Include no additional text explaining the API link for saying 'here it is'
                    """},
                   {"role": "assistant", "content": "OK"}]
                    

def predict(input):
    '''
    Takes a user's input and attemtps to generate a response
    '''
    message_history.append({"role": "user", "content": f"{input}"})
    
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=message_history)
    
    reply_txt = response.choices[0].message.content
    
    message_history.append({"role": "assistant", "content": f"{reply_txt}"})
    return reply_txt

def api_read(response):
    
    api_link = response.split()
    api_link = [link for link in api_link if link.startswith('https')]
    if not api_link:
        error_message_for_gpt = "Please try again, I want a dataframe showing the data from the Quickstats API"
        return(error_message_for_gpt)

    else:
        api_link = api_link[0]
        api_link = api_link.replace("YOUR_API_KEY", quickstats_api_key)
        print(f"This is for debugging purposes only: {api_link}")
            
        # Make the API request
        api_pull = requests.get(api_link)
        data = api_pull.json()
        # Extract the relevant data from the response
        relevant_data = data.get('data', [])
        # Create a DataFrame
        df = pd.DataFrame(relevant_data)
        return(df)

while True:

    user_in = input("Please type your question: ")
    if user_in.lower() == "quit":
        break
    
    response = predict(user_in)
    
    while "SUCCESS" not in response:   
        print("\nI'm sorry, I'm unable to understand or complete this request. Please try again. \n")
        
        user_in = input("Please retype your question: ")
        if user_in.lower() == "quit":
            master_break = True
            break
        response = predict(user_in)

    if 'master_break' in locals():
        del master_break
        break

    df = api_read(response)
    
    if isinstance(df, pd.DataFrame) and len(df) > 0:
        
        if 'Value' in df.columns:
            df['Value'] = df['Value'].str.replace(',', '', regex=False)
            df['Value'] = df['Value'].str.replace('(NA)', '', regex=False)
            df['Value'] = df['Value'].str.replace('()', '', regex=False)
            df['Value'] = pd.to_numeric(df['Value'], errors= "coerce")
        
        print(f"Data successfully pulled from NASS API with shape {df.shape}")
        print(df.head())
    else:
        print("API link no work")



Please type your question: How many oranges grown in 2021?
This is for debugging purposes only: https://quickstats.nass.usda.gov/api/api_GET/?key=03D05FC5-8939-3C58-A75D-E99BF678E6F0&commodity_desc=ORANGES&year__GT=2020
Data successfully pulled from NASS API with shape (3679, 39)
                 load_time state_fips_code  Value sector_desc county_name  \
0  2022-12-15 12:00:00.000              99  242.0       CROPS               
1  2022-12-15 12:00:00.000              04    2.0       CROPS               
2  2022-12-15 12:00:00.000              06  194.0       CROPS               
3  2022-12-15 12:00:00.000              15    3.0       CROPS               
4  2022-12-15 12:00:00.000              48   44.0       CROPS               

  asd_desc county_code week_ending                        domaincat_desc  \
0                                   ORGANIC STATUS: (NOP USDA CERTIFIED)   
1                                   ORGANIC STATUS: (NOP USDA CERTIFIED)   
2                           

# Dealing with nonsense queries or when GPT doesn't understand

In [9]:
while True:

    user_in = input("Please type your question: ")
    if user_in.lower() == "quit":
        break
    
    response = predict(user_in)
    
    while "SUCCESS" not in response:   
        print("\nI'm sorry, I'm unable to understand or complete this request. Please try again. \n")
        
        user_in = input("Please retype your question: ")
        if user_in.lower() == "quit":
            master_break = True
            break
        response = predict(user_in)

    if 'master_break' in locals():
        del master_break
        break

    df = api_read(response)
    
    if isinstance(df, pd.DataFrame) and len(df) > 0:
        
        if 'Value' in df.columns:
            df['Value'] = df['Value'].str.replace(',', '', regex=False)
            df['Value'] = df['Value'].str.replace('(NA)', '', regex=False)
            df['Value'] = df['Value'].str.replace('()', '', regex=False)
            df['Value'] = pd.to_numeric(df['Value'], errors= "coerce")
        
        print(f"Data successfully pulled from NASS API with shape {df.shape}")
        print(df.head())
    else:
        print("API link no work")

Please type your question: beep boop

I'm sorry, I'm unable to understand or complete this request. Please try again. 

Please retype your question: apples grown in florida between 2018 and 2020
This is for debugging purposes only: https://quickstats.nass.usda.gov/api/api_GET/?key=03D05FC5-8939-3C58-A75D-E99BF678E6F0&commodity_desc=APPLES&state_fips=12&year__GE=2018&year__LE=2020
Data successfully pulled from NASS API with shape (42361, 39)
                         domaincat_desc week_ending county_ansi  \
0  ORGANIC STATUS: (NOP USDA CERTIFIED)                           
1  ORGANIC STATUS: (NOP USDA CERTIFIED)                           
2  ORGANIC STATUS: (NOP USDA CERTIFIED)                           
3  ORGANIC STATUS: (NOP USDA CERTIFIED)                           
4  ORGANIC STATUS: (NOP USDA CERTIFIED)                           

                 load_time county_name congr_district_code CV (%)  \
0  2022-12-15 12:00:00.000                                   20.7   
1  2018-02-01 