In this example, let's perform a simple task: finding the headerquarter cities and states for a list of firms.

In [2]:
# Install the openai Python package first.

!pip install openai



In [3]:
from google.colab import drive
drive.mount('/drive')

Mounted at /drive


In [4]:
# The api key is stored in a txt file named "INFO 617 Student API Key.txt"
# You can create an OpenAI client instance once you provide a valid api key.
# You will need to update the path to the .txt file

from openai import OpenAI
with open('/drive/My Drive/Teaching/2025 Spring/INFO 617/OpenAI API Usage/INFO 617 Student API Key.txt', 'r') as file:
    api_key = file.read().strip()

client = OpenAI(api_key=api_key)

In [34]:
# Load the dataset containing a list of companies and the country where a firm is based.
# You will need to update the path to the .xlsx file

import pandas as pd
firms = pd.read_excel('/drive/My Drive/Teaching/2025 Spring/INFO 617/OpenAI API Usage/List of Firms with Countries.xlsx')
print(firms.head())

                           name country
0             1-800-FLOWERS.COM     USA
1                        2U INC     USA
2  37 INTERACTIVE ENTERTAINMENT     CHN
3               3D SYSTEMS CORP     USA
4                         3M CO     USA


In [22]:
# A user prompt I designed for this task.

prompt = "Identify the headquarters (city and state) for each firm in the list below. \
    The firms are separated by semicolons, and their nationalities are indicated in parentheses. \
    Return the headquarters information for each firm in the following format:  Firm Name (Nationality): City, State.\n \
    Ten firms are listed as follows.\n"

print(prompt)

Identify the headquarters (city and state) for each firm in the list below.     The firms are separated by semicolons, and their nationalities are indicated in parentheses.     Return the headquarters information for each firm in the following format:  Firm Name (Nationality): City, State.
     Ten firms are listed as follows.



In [14]:
# Prepare the data to be inserted into the prompt.

firms['combined'] = firms['name'] + ' (' + firms['country'] + ');'
firms.head()

Unnamed: 0,name,country,combined
0,1-800-FLOWERS.COM,USA,1-800-FLOWERS.COM (USA);
1,2U INC,USA,2U INC (USA);
2,37 INTERACTIVE ENTERTAINMENT,CHN,37 INTERACTIVE ENTERTAINMENT (CHN);
3,3D SYSTEMS CORP,USA,3D SYSTEMS CORP (USA);
4,3M CO,USA,3M CO (USA);


In [23]:
# Combine the fixed prompt with the data.

firm_list = ' '.join(firms['combined'].iloc[:10])
firm_prompt = prompt + '"""\n' + firm_list + '\n"""'
print(firm_prompt)

Identify the headquarters (city and state) for each firm in the list below.     The firms are separated by semicolons, and their nationalities are indicated in parentheses.     Return the headquarters information for each firm in the following format:  Firm Name (Nationality): City, State.
     Ten firms are listed as follows.
"""
1-800-FLOWERS.COM (USA); 2U INC (USA); 37 INTERACTIVE ENTERTAINMENT (CHN); 3D SYSTEMS CORP (USA); 3M CO (USA); 3P LEARNING LTD (AUS); 4IMPRINT GROUP PLC (GBR); 51JOB INC (CHN); 888 HOLDINGS (GIB); 8X8 INC (USA);
"""


In [24]:
# Request an api response from openai.
# Here, we use a very recent, capable model -- gpt-4.1-mini.
# Learn more about the model here: https://openai.com/index/gpt-4-1/

response = client.responses.create(
  model="gpt-4.1-mini",
  input=firm_prompt
)

Response(id='resp_680be940f72c8191872cb5edfe144ff50790ac16c4a41aa2', created_at=1745611072.0, error=None, incomplete_details=None, instructions=None, metadata={}, model='gpt-4.1-mini-2025-04-14', object='response', output=[ResponseOutputMessage(id='msg_680be94173d481918d5edec02ccfce390790ac16c4a41aa2', content=[ResponseOutputText(annotations=[], text='1-800-FLOWERS.COM (USA): Carle Place, New York  \n2U INC (USA): Lanham, Maryland  \n37 INTERACTIVE ENTERTAINMENT (CHN): Beijing, China  \n3D SYSTEMS CORP (USA): Rock Hill, South Carolina  \n3M CO (USA): Saint Paul, Minnesota  \n3P LEARNING LTD (AUS): Brisbane, Queensland  \n4IMPRINT GROUP PLC (GBR): London, England  \n51JOB INC (CHN): Shanghai, China  \n888 HOLDINGS (GIB): Gibraltar  \n8X8 INC (USA): Campbell, California', type='output_text')], role='assistant', status='completed', type='message')], parallel_tool_calls=True, temperature=1.0, tool_choice='auto', tools=[], top_p=1.0, max_output_tokens=None, previous_response_id=None, reason

In [33]:
# It returns a list of texts, with each row starting from a firm's name, followed by its headquarter city and state.

print(response.output[0].content[0].text)

1-800-FLOWERS.COM (USA): Carle Place, New York  
2U INC (USA): Lanham, Maryland  
37 INTERACTIVE ENTERTAINMENT (CHN): Beijing, China  
3D SYSTEMS CORP (USA): Rock Hill, South Carolina  
3M CO (USA): Saint Paul, Minnesota  
3P LEARNING LTD (AUS): Brisbane, Queensland  
4IMPRINT GROUP PLC (GBR): London, England  
51JOB INC (CHN): Shanghai, China  
888 HOLDINGS (GIB): Gibraltar  
8X8 INC (USA): Campbell, California


In [39]:
# With proper parsing, we can save the results in a csv/xlsx file.

results = pd.DataFrame([se.split(':') for se in response.output[0].content[0].text.split('\n')], columns = ['Company', 'Headerquarter'])
results[['City', 'State']] = results['Headerquarter'].str.split(',', expand=True)
print(results.head())

                              Company                 Headerquarter  \
0             1-800-FLOWERS.COM (USA)       Carle Place, New York     
1                        2U INC (USA)            Lanham, Maryland     
2  37 INTERACTIVE ENTERTAINMENT (CHN)              Beijing, China     
3               3D SYSTEMS CORP (USA)   Rock Hill, South Carolina     
4                         3M CO (USA)       Saint Paul, Minnesota     

           City              State  
0   Carle Place         New York    
1        Lanham         Maryland    
2       Beijing            China    
3     Rock Hill   South Carolina    
4    Saint Paul        Minnesota    
