### Necessary Imports and SET openAI API keys
Currently using free API key set in system environment variables


In [1]:
import json
import openai
import os
from dotenv import load_dotenv
import requests
import geopandas as gpd
import jpype
import json 

openai.api_key = os.getenv('OPENAI_API_KEY')
# print(openai.api_key)

In [2]:
function_descriptions = [
            {
                "name": "emp",
                "description": "Clustering a set of geographic areas into the maximum number of homogeneous regions that satisfies a set of user defined constraints",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "The city and state, e.g. San Francisco, CA",
                        },
                        "sumName": {
                            "type": "string",
                            "description": "The name of the spatial extensive attribute variable for the SUM constraint",
                            #"enum": ["pop2010", "pop_16up"]
                        },
                        "sumLow":{
                            "type": "integer",
                            "descripition": "The lowerbound for the SUM range"
                        },
                        "sumHigh":{
                            "type": "integer",
                            "descripition": "The upperbound for the SUM range"
                        },
                        "disname":{
                            "type": "string",
                            "descripition": "The dissimlarity attribute"
                        },
                    },
                    "required": ["location", "sumName","sumLow","sumHigh","disname"],
                },
            }
        ]

In [3]:
## Frontend user query input simulation

url = 'http://127.0.0.1:8000/user_query'
params = {
    'user_query': 'Get me the regions of census tracts in Los Angeles whose minimum total population is between 30,000 to 50,000 with a dissimilarity on households'
}
headers = {
    'accept': 'application/json'
}

response = requests.get(url, params=params, headers=headers)

if response.status_code == 200:
    print("User query sent successfully")
else:
    print('Request failed with status code:', response.status_code)

User query sent successfully


In [4]:
# user_query = "Get me the regions of census tracts in Los Angeles whose minimum total population is between 30,000 to 50,000 with a dissimilarity of households"
endpoint = "http://localhost:8000/gpt_user_query"
user_query = requests.get(endpoint).text
print(user_query)
#print(type(user_query))


"Get me the regions of census tracts in Los Angeles whose minimum total population is between 30,000 to 50,000 with a dissimilarity of households"


In [5]:
def read_shapefile(data_dir,  filename):
    file_path = os.path.join(data_dir, filename)
    gdf = gpd.read_file(file_path)
    return gdf


In [6]:
data_dir = "D:/user_pa1n/VSCode/projects/GPT-Pyneapple/testData"
filename = "LACity.shp"
df = read_shapefile(data_dir,filename)
print(df.columns.values)


['fid' 'OBJECTID' 'TRACTCE10' 'POP' 'LATPOP_D' 'WHIPOP_D' 'BLAPOP_D'
 'AMIPOP_D' 'ASIPOP_D' 'HPIPOP_D' 'OTHPOP_D' 'MMRPOP_D' 'VAP' 'LATVAP_D'
 'WHIVAP_D' 'BLAVAP_D' 'AMIVAP_D' 'ASIVAP_D' 'HPIVAP_D' 'OTHVAP_D'
 'MMRVAP_D' 'H0010001' 'H0010002' 'H0010003' 'TOOLTIP' 'NLA_URL'
 'STATEFP10' 'COUNTYFP10' 'TRACTCE10_' 'GEOID10' 'NAME10' 'NAMELSAD10'
 'MTFCC10' 'FUNCSTAT10' 'ALAND10' 'AWATER10' 'INTPTLAT10' 'INTPTLON10'
 'Tot_Pop' 'One_Race' 'White' 'Afro_Amer' 'Native' 'Asian' 'Pac_Isl'
 'Other_Race' 'Two_Or_Mor' 'Hispanic' 'Not_Hisp' 'Hisp_One_R' 'Hisp_White'
 'Hisp_Afro_' 'Hisp_Nativ' 'Hisp_Asian' 'Hisp_Pac_I' 'Other' 'Hisp_Two_O'
 'Tot_Housin' 'Occup_Hous' 'Vac_Housin' 'GEO.id' 'pop2010' 'households'
 'pop_16up' 'employed' 'unemployed' 'geometry']


In [7]:
df_context = str(df.columns.values)
# print(df_context)


In [8]:
chat_history = []
#{"role": "", "content": ""}

In [9]:
chat_history.append({"role": "system","content": "You are a helpful assistant"})
chat_history.append({"role": "user","content": "Based on the name  of the columns of the  dataframe help select appropiate column as paramters for functions based on the user query"})
chat_history.append({"role": "user", "content": "The contents of the dataframe are : "+ df_context})
print(len(chat_history))


3


In [10]:
Initialresponse = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-0613",
        
        # This is the chat message from the user
        messages= chat_history

    )
print(Initialresponse["choices"][0]["message"])

{
  "role": "assistant",
  "content": "Based on the names of the columns in the dataframe, here are some suggestions for selecting appropriate columns as parameters for functions based on user queries:\n\n- 'POP': This column represents the total population.\n- 'WHIPOP_D': This column represents the population of White people.\n- 'BLAPOP_D': This column represents the population of African Americans.\n- 'ASIPOP_D': This column represents the population of Asians.\n- 'H0010001': This column represents a specific category related to housing.\n- 'pop2010': This column represents the population count for the year 2010.\n\nThese are just a few examples, but you can select any column based on your specific requirements and the nature of the function you are using."
}


In [11]:
chat_history.append(Initialresponse["choices"][0]["message"])
print(len(chat_history))

4


In [12]:
response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-0613",
        
        # This is the chat message from the user
        messages= chat_history + [{"role": "user", "content": "Given the user query : " + user_query + "respond with the function call. Make sure the arguments are exactly the same as dataframe contents. Convert any integer value to a double in the arguments"}],
    
        
        functions=function_descriptions,
        function_call="auto",
    )

In [13]:
ai_response_message = response["choices"][0]["message"]
print(ai_response_message)

{
  "role": "assistant",
  "content": null,
  "function_call": {
    "name": "emp",
    "arguments": "{\n  \"location\": \"Los Angeles, CA\",\n  \"sumName\": \"POP\",\n  \"sumLow\": 30000.0,\n  \"sumHigh\": 50000.0,\n  \"disname\": \"households\"\n}"
  }
}


In [14]:
callingFunction = ai_response_message['function_call']['name']
dataset = eval(ai_response_message['function_call']['arguments']).get("location")
print("Funtion to be called is: ",callingFunction)
print("Dataset to be used is: ",dataset)

Funtion to be called is:  emp
Dataset to be used is:  Los Angeles, CA


In [15]:
parameters = json.loads(ai_response_message["function_call"]["arguments"])
parameters['callingFuntion'] = callingFunction
#parameters = json.dumps(parameters)
print(parameters)

{'location': 'Los Angeles, CA', 'sumName': 'POP', 'sumLow': 30000.0, 'sumHigh': 50000.0, 'disname': 'households', 'callingFuntion': 'emp'}


In [30]:
# def get_test_response(location):

#     test_response = {
#         "location" : location,
#         "max_p": '135',
#         "labels": "[101,19,101,19,101,101,19,101,101,19,19,19,19,104,19,13,13,19,13,13,23,23,13,13,37,154,154,154,154,154,13,19,19,19,154,154,25,25,25,23,23,23,23,32,25,25,23,26,32,26,32,26,29,26,29,29,32,32,32,32,33,33,33,26,26,36,37,41,41,37,37,154,37,37,41,37,41,29,29,33,68,33,29,71,41,36,36,36,61,36,56,56,56,196,56,61,33,61,61,61,62,196,196,71,71,71,68,68,68,62,71,71,68,83,83,37,143,143,143,143,143,37,84,143,83,84,84,85,154,154,83,83,83,83,83,154,84,85,154,87,84,87,87,84,87,84,87,87,84,87,154,105,105,19,104,154,105,105,85,104,105,104,85,101,101,101,101,104,118,101,101,118,117,104,118,118,118,105,85,115,110,110,110,110,110,125,115,115,118,115,125,117,118,118,117,117,117,117,117,110,125,125,125,125,125,115,248,248,128,128,128,248,248,248,136,154,154,136,136,141,141,143,141,141,179,179,179,151,151,151,151,151,136,154,154,151,162,125,179,151,179,162,162,125,162,242,162,242,68,170,141,173,173,173,62,184,184,170,173,179,179,141,173,170,184,184,170,226,184,184,194,194,62,204,204,196,56,194,194,56,196,199,199,199,199,194,194,196,204,184,204,204,209,204,209,194,209,209,209,217,217,199,218,209,218,217,217,218,218,599,218,599,218,179,226,209,226,226,226,599,226,233,233,233,233,599,599,599,599,239,239,239,239,242,242,239,599,599,242,729,248,128,128,128,128,301,248,301,301,301,301,239,260,260,260,260,278,265,266,265,265,265,278,278,271,278,271,271,265,265,266,266,271,266,271,371,281,271,371,281,278,278,281,278,278,281,278,281,281,278,349,281,349,289,289,289,289,289,289,293,289,293,293,299,299,248,248,299,299,301,301,301,299,310,299,299,293,293,310,310,310,315,315,315,315,310,310,315,293,322,322,322,322,322,322,322,325,325,334,325,315,334,333,333,333,333,334,334,334,340,325,325,340,357,301,301,729,729,333,466,466,349,289,289,289,349,349,355,355,355,357,357,357,322,322,357,349,281,281,355,355,401,401,371,369,369,371,369,369,371,371,369,369,373,373,266,266,266,369,379,373,266,373,373,379,379,379,379,379,379,379,379,387,379,379,387,379,387,387,379,379,387,387,379,379,379,379,387,379,401,401,379,379,379,397,379,397,401,401,397,397,397,397,397,414,414,401,402,402,402,355,357,411,357,411,411,433,411,411,414,411,411,402,402,402,397,402,433,411,414,414,414,510,510,414,334,433,357,340,340,340,430,340,340,340,428,334,428,428,428,430,430,430,433,433,430,430,502,502,433,430,430,430,502,501,430,501,501,428,501,501,481,481,481,501,501,501,430,502,510,510,333,334,333,466,333,466,466,466,466,466,466,466,481,334,481,646,475,475,475,475,646,475,475,646,481,481,502,481,502,646,487,487,487,502,504,502,504,504,525,525,492,492,487,487,646,492,492,510,510,502,502,502,501,501,502,502,502,502,504,504,504,504,526,504,525,504,502,526,526,555,414,526,414,510,510,414,526,526,526,397,397,379,379,526,379,526,514,526,514,526,526,532,532,532,514,514,514,521,521,521,521,521,521,521,555,555,555,526,525,526,555,555,555,532,532,555,532,555,533,533,545,533,555,555,525,525,553,533,533,545,545,545,545,545,545,564,545,553,553,553,492,492,553,555,555,533,545,564,565,533,555,555,565,565,565,564,564,564,565,566,566,564,555,555,555,565,555,555,566,565,566,566,570,570,570,570,570,576,566,576,576,576,578,578,578,583,583,583,576,583,576,576,576,694,578,589,589,578,583,589,589,589,729,729,729,729,242,598,598,598,598,598,599,599,692,598,598,598,242,608,608,608,630,630,729,729,729,729,729,242,242,729,619,630,630,630,619,619,619,608,628,608,628,628,628,628,628,630,630,644,639,630,639,639,639,639,639,644,644,644,644,644,644,646,646,628,628,649,650,650,649,649,649,660,649,660,660,650,660,678,660,660,650,692,692,692,692,692,650,678,678,678,678,681,681,681,682,681,678,681,682,681,682,682,686,686,686,692,686,682,686,686,686,686,692,694,694,694,694,694,694,699,699,699,699,699,701,701,701,707,711,707,701,704,701,701,704,707,707,707,707,704,704,704,711,701,724,724,711,711,711,711,711,724,711,711,724,724,724,724,724,724,724,724,724,373,373,707,694,729,242,650,217,36,25,217,141,289,281,711,701,599,101,19,41,56,179,19,686,711,692]"
#     }

#     return json.dumps(test_response)

In [16]:
# function_response = get_test_response(location=dataset)
# print(function_response)
endpoint = "http://localhost:8000/api/endpoint"
function_response = requests.post(endpoint, json = parameters)
if function_response.status_code == 200:
    result = function_response.json()
else:
    print("Request failed with response code: ", function_response.status_code)
print(result)

{"max_p": 105, "labels": [5, 5, 5, 5, 5, 14, 14, 14, 14, 14, 14, 5, 14, 14, 5, 20, 20, 20, 20, 20, 26, 20, 26, 26, 26, 26, 35, 26, 35, 35, 35, 35, 35, 35, 35, 162, 41, 41, 41, 20, 41, 41, 43, 41, 41, 43, 43, 52, 52, 52, 52, 52, 52, 52, 52, 43, 43, 43, 43, 43, 995, 995, 995, 995, 995, 995, 43, 43, 43, 75, 26, 75, 75, 75, 75, 75, 43, 43, 84, 995, 84, 995, 84, 84, 108, 995, 995, 995, 95, 95, 95, 95, 95, 271, 95, 95, 84, 249, 95, 249, 249, 271, 271, 108, 108, 108, 84, 108, 84, 249, 108, 108, 108, 122, 75, 75, 122, 122, 108, 122, 122, 122, 122, 147, 122, 147, 147, 135, 26, 135, 26, 75, 122, 147, 122, 122, 135, 135, 135, 143, 143, 143, 143, 147, 143, 147, 147, 147, 147, 143, 143, 135, 175, 14, 35, 162, 162, 162, 175, 175, 162, 162, 135, 162, 162, 162, 14, 175, 162, 176, 176, 176, 176, 175, 175, 176, 176, 175, 185, 185, 185, 185, 185, 185, 240, 240, 185, 205, 175, 175, 205, 208, 176, 208, 208, 176, 208, 208, 176, 205, 240, 240, 205, 205, 205, 205, 208, 208, 208, 341, 341, 341, 341, 208, 234, 

In [32]:
# second_response = openai.ChatCompletion.create(
#     model="gpt-3.5-turbo-0613",
#     messages=[
#         {"role": "user", "content": user_query},
#         ai_response_message,
#         {
#             "role": "function",
#             "name": "emp",
#             "content": function_response,
#         },
#     ],
# )

In [17]:
evaluate_function_response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-0613",
        
        # This is the chat message from the user
        messages= chat_history + [{"role": "user", "content": "The function returns : " + function_response.json() + "if max_p shows maximum number of regions formed for the given query. Tell me the maximum number of regions that satisfy the userquery"}],
    
    )

In [18]:
print (evaluate_function_response['choices'][0]['message']['content'])

The maximum number of regions that satisfy the user query is 105.
