# L6: Check outputs

## Setup
#### Load the API key and relevant Python libaries.
In this course, we've provided some code that loads the OpenAI API key for you.

In [51]:
import os
import openai
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.environ['OPENAI_API_KEY']

In [52]:
def get_completion_from_messages(messages, model="gpt-3.5-turbo", temperature=0, max_tokens=500):
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, 
        max_tokens=max_tokens, 
    )
    return response.choices[0].message["content"]

### Check output for potentially harmful content

In [53]:
final_response_to_customer = f"""
The SmartX ProPhone has a 6.1-inch display, 128GB storage, \
12MP dual camera, and 5G. The FotoSnap DSLR Camera \
has a 24.2MP sensor, 1080p video, 3-inch LCD, and \
interchangeable lenses. We have a variety of TVs, including \
the CineView 4K TV with a 55-inch display, 4K resolution, \
HDR, and smart TV features. We also have the SoundMax \
Home Theater system with 5.1 channel, 1000W output, wireless \
subwoofer, and Bluetooth. Do you have any specific questions \
about these products or any other products we offer?
"""
response = openai.Moderation.create(
    input=final_response_to_customer
)
moderation_output = response["results"][0]
print(moderation_output)

{
  "categories": {
    "harassment": false,
    "harassment/threatening": false,
    "hate": false,
    "hate/threatening": false,
    "self-harm": false,
    "self-harm/instructions": false,
    "self-harm/intent": false,
    "sexual": false,
    "sexual/minors": false,
    "violence": false,
    "violence/graphic": false
  },
  "category_scores": {
    "harassment": 2.733418114075903e-05,
    "harassment/threatening": 9.949477316695265e-06,
    "hate": 7.229774382722098e-06,
    "hate/threatening": 2.0442371351236943e-06,
    "self-harm": 1.2748321296385257e-06,
    "self-harm/instructions": 3.7145471765143157e-07,
    "self-harm/intent": 2.0609875264199218e-06,
    "sexual": 0.00015618029283359647,
    "sexual/minors": 1.1930209439015016e-05,
    "violence": 0.0002992694790009409,
    "violence/graphic": 1.4630574696639087e-05
  },
  "flagged": false
}


### Check if output is factually based on the provided product information

In [54]:
system_message = f"""
You are an assistant that evaluates whether \
customer service agent responses sufficiently \
answer customer questions, and also validates that \
all the facts the assistant cites from the product \
information are correct.
The product information and user and customer \
service agent messages will be delimited by \
3 backticks, i.e. ```.
Respond with a Y or N character, with no punctuation:
Y - if the output sufficiently answers the question \
AND the response correctly uses product information
N - otherwise

Output a single letter only.
"""
customer_message = f"""
tell me about the smartx pro phone and \
the fotosnap camera, the dslr one. \
Also tell me about your tvs"""
product_information = """{ "name": "SmartX ProPhone", "category": "Smartphones and Accessories", "brand": "SmartX", "model_number": "SX-PP10", "warranty": "1 year", "rating": 4.6, "features": [ "6.1-inch display", "128GB storage", "12MP dual camera", "5G" ], "description": "A powerful smartphone with advanced camera features.", "price": 899.99 } { "name": "FotoSnap DSLR Camera", "category": "Cameras and Camcorders", "brand": "FotoSnap", "model_number": "FS-DSLR200", "warranty": "1 year", "rating": 4.7, "features": [ "24.2MP sensor", "1080p video", "3-inch LCD", "Interchangeable lenses" ], "description": "Capture stunning photos and videos with this versatile DSLR camera.", "price": 599.99 } { "name": "CineView 4K TV", "category": "Televisions and Home Theater Systems", "brand": "CineView", "model_number": "CV-4K55", "warranty": "2 years", "rating": 4.8, "features": [ "55-inch display", "4K resolution", "HDR", "Smart TV" ], "description": "A stunning 4K TV with vibrant colors and smart features.", "price": 599.99 } { "name": "SoundMax Home Theater", "category": "Televisions and Home Theater Systems", "brand": "SoundMax", "model_number": "SM-HT100", "warranty": "1 year", "rating": 4.4, "features": [ "5.1 channel", "1000W output", "Wireless subwoofer", "Bluetooth" ], "description": "A powerful home theater system for an immersive audio experience.", "price": 399.99 } { "name": "CineView 8K TV", "category": "Televisions and Home Theater Systems", "brand": "CineView", "model_number": "CV-8K65", "warranty": "2 years", "rating": 4.9, "features": [ "65-inch display", "8K resolution", "HDR", "Smart TV" ], "description": "Experience the future of television with this stunning 8K TV.", "price": 2999.99 } { "name": "SoundMax Soundbar", "category": "Televisions and Home Theater Systems", "brand": "SoundMax", "model_number": "SM-SB50", "warranty": "1 year", "rating": 4.3, "features": [ "2.1 channel", "300W output", "Wireless subwoofer", "Bluetooth" ], "description": "Upgrade your TV's audio with this sleek and powerful soundbar.", "price": 199.99 } { "name": "CineView OLED TV", "category": "Televisions and Home Theater Systems", "brand": "CineView", "model_number": "CV-OLED55", "warranty": "2 years", "rating": 4.7, "features": [ "55-inch display", "4K resolution", "HDR", "Smart TV" ], "description": "Experience true blacks and vibrant colors with this OLED TV.", "price": 1499.99 }"""
q_a_pair = f"""
Customer message: ```{customer_message}```
Product information: ```{product_information}```
Agent response: ```{final_response_to_customer}```

Does the response use the retrieved information correctly?
Does the response sufficiently answer the question

Output Y or N
"""
messages = [
    {'role': 'system', 'content': system_message},
    {'role': 'user', 'content': q_a_pair}
]

response = get_completion_from_messages(messages, max_tokens=1)
print(response)

Y


In [55]:
another_response = "life is like a box of chocolates"
q_a_pair = f"""
Customer message: ```{customer_message}```
Product information: ```{product_information}```
Agent response: ```{another_response}```

Does the response use the retrieved information correctly?
Does the response sufficiently answer the question?

Output Y or N
"""
messages = [
    {'role': 'system', 'content': system_message},
    {'role': 'user', 'content': q_a_pair}
]

response = get_completion_from_messages(messages)
print(response)

N


## Experiment on your own

In [58]:
# Evaluating model output with moderation API

final_response_to_customer = f"""
Today, in the food and entertainment sectors, there were some notable developments in the stock market.

1. Campbell Soup (CPB): Campbell Soup, a global food company, experienced a significant increase in its stock price. The initial price was $37.00, and it closed at $58.15. The company made headlines with the announcement of its plan to introduce a new plant-based soup line, targeting the growing vegan market.

2. Disney (DIS): The Walt Disney Company, a renowned entertainment company, also saw a substantial rise in its stock price. Starting at $40.68, it ended the day at $131.50. Disney made waves with the news of its upcoming launch of a virtual reality theme park, which will blend traditional rides with immersive VR experiences.

3. General Mills (GIS): General Mills, a global consumer foods manufacturer, experienced a significant surge in its stock price. It started at $15.59 and closed at $52.74. The company made headlines by announcing its integration of blockchain technology to enhance food supply chain transparency.

4. PepsiCo (PEP): PepsiCo, a well-known food and beverage company, also witnessed a notable increase in its stock price. Starting at $34.13, it closed at $105.67. PepsiCo made news with its plan to launch a new line of health-oriented beverages, focusing on wellness and nutrition.

5. Starbucks (SBUX): Starbucks Corporation, a leading specialty coffee and tea company, had a remarkable increase in its stock price. It started at $6.23 and closed at $48.93. Starbucks made headlines by expanding into the home brewing systems market, introducing a new range of smart coffee machines.

These developments indicate positive market sentiment and investor interest in both the food and entertainment sectors. It is important to note that stock prices can fluctuate throughout the day, and it is advisable to consult a financial advisor or conduct further research before making any investment decisions.
"""
response = openai.Moderation.create(
    input=final_response_to_customer
)
moderation_output = response["results"][0]
print(moderation_output)

{
  "categories": {
    "harassment": false,
    "harassment/threatening": false,
    "hate": false,
    "hate/threatening": false,
    "self-harm": false,
    "self-harm/instructions": false,
    "self-harm/intent": false,
    "sexual": false,
    "sexual/minors": false,
    "violence": false,
    "violence/graphic": false
  },
  "category_scores": {
    "harassment": 1.0587268661765847e-05,
    "harassment/threatening": 2.0715497157652862e-06,
    "hate": 3.4593060718179913e-06,
    "hate/threatening": 2.1250730242172722e-07,
    "self-harm": 6.547399777900864e-08,
    "self-harm/instructions": 1.8845437210757154e-08,
    "self-harm/intent": 2.4184043212471806e-08,
    "sexual": 8.238130249083042e-06,
    "sexual/minors": 7.201081757557404e-07,
    "violence": 6.682801904389635e-05,
    "violence/graphic": 0.000215745487366803
  },
  "flagged": false
}


In [59]:
# Checking if model output is factual, example 1

system_message = f"""
You are an assistant that evaluates whether \
stock analyst agent responses sufficiently \
answer customer questions, and also validates that \
all the facts the assistant cites from the stock \
information are correct.
The product information and user and stock analyst \
service agent messages will be delimited by \
3 backticks, i.e. ```.
Respond with a Y or N character, with no punctuation:
Y - if the output sufficiently answers the question \
AND the response correctly uses stock information
N - otherwise

Output a single letter only.
"""
customer_message = f"""
Tell me what happened with food and \
entertainment stocks today.
"""
stock_information = """[
    {
        "company": "3M",
        "description": "3M, based in Minnesota, may be best known for its Scotch tape and Post-It Notes, but it also produces sand paper, adhesives, medical products, computer screen filters, food safety items, stationery products and many products used in automotive, marine, and aircraft industries.",
        "industry_vertical": "Manufacturing & Consumer Goods",
        "initial_price": 44.28,
        "end_price": 102.47,
        "news_story": "3M Announces Breakthrough in Sustainable Adhesive Technology, Aiming to Reduce Environmental Footprint",
        "symbol": "MMM"
    },
    {
        "company": "Amazon.com",
        "description": "Amazon.com, Inc. is an online retailer in North America and internationally. The company serves consumers through its retail Web sites and focuses on selection, price, and convenience.",
        "industry_vertical": "Technology & E-commerce",
        "initial_price": 89.38,
        "end_price": 315.20,
        "news_story": "Amazon.com Unveils Plan to Launch a Fleet of Delivery Drones, Revolutionizing E-commerce Logistics",
        "symbol": "AMZN"
    },
    {
        "company": "Campbell Soup",
        "description": "Campbell Soup is a worldwide food company, offering condensed and ready-to-serve soups; broth, stocks, and canned poultry; pasta sauces; Mexican sauces; canned pastas, gravies, and beans; juices and beverages; and tomato juices.",
        "industry_vertical": "Food & Beverages",
        "initial_price": 37.0,
        "end_price": 58.15,
        "news_story": "Campbell Soup to Introduce Plant-Based Soup Line, Targeting the Growing Vegan Market",
        "symbol": "CPB"
    },
    {
        "company": "Disney",
        "description": "The Walt Disney Company, founded in 1923, is a worldwide entertainment company, with movies, cable networks, radio networks, movie production, musical recordings and live stage plays.",
        "industry_vertical": "Entertainment & Media",
        "initial_price": 40.68,
        "end_price": 131.50,
        "news_story": "Disney to Launch Virtual Reality Theme Park, Blending Traditional Rides with VR Experiences",
        "symbol": "DIS"
    },
    {
        "company": "Dow Chemical",
        "description": "The Dow Chemical Company manufactures raw materials that go into consumer products and services. These materials include food and pharmaceutical ingredients, electronic displays, and semiconductor packaging.",
        "industry_vertical": "Manufacturing & Consumer Goods",
        "initial_price": 38.83,
        "end_price": 76.22,
        "news_story": "Dow Chemical Announces Major Breakthrough in Biodegradable Plastics, Aiming to Combat Plastic Waste",
        "symbol": "DOW"
    },
    {
        "company": "Exxon Mobil",
        "description": "Exxon Mobil engages in the exploration and production of crude oil and natural gas, and manufacture of petroleum products.",
        "industry_vertical": "Energy & Resources",
        "initial_price": 39.0,
        "end_price": 112.30,
        "news_story": "Exxon Mobil to Invest Heavily in Renewable Energy, Shifting Focus from Fossil Fuels",
        "symbol": "XOM"
    },
    {
        "company": "Ford",
        "description": "Ford Motor Co. develops, manufactures, sells and services vehicles and parts worldwide. Ford sells cars and trucks primarily under the Ford and Lincoln brands.",
        "industry_vertical": "Automotive & Transportation",
        "initial_price": 27.34,
        "end_price": 47.89,
        "news_story": "Ford Unveils New Self-Driving Car Prototype, Promising a Revolution in Personal Transportation",
        "symbol": "F"
    },
    {
        "company": "The Gap",
        "description": "The Gap, Inc. sells retail clothing, accessories and personal care products globally under the brand names Gap, Old Navy, Banana Republic, and others.",
        "industry_vertical": "Retail & Fashion",
        "initial_price": 46.0,
        "end_price": 33.67,
        "news_story": "The Gap Announces Expansion into Sustainable Fashion, Launching a New Eco-friendly Clothing Line",
        "symbol": "GPS"
    },
    {
        "company": "General Mills",
        "description": "General Mills manufactures and sells consumer foods worldwide. Products include cereals, frozen vegetables, dough, dessert and baking mixes, and frozen pizzas.",
        "industry_vertical": "Food & Beverages",
        "initial_price": 15.59,
        "end_price": 52.74,
        "news_story": "General Mills to Integrate Blockchain for Food Supply Chain Transparency",
        "symbol": "GIS"
    },
    {
        "company": "Hewlett Packard",
        "description": "Hewlett-Packard designs and sells products, technologies, software and IT services to consumers, businesses, government and education sectors worldwide.",
        "industry_vertical": "Technology & E-commerce",
        "initial_price": 66.28,
        "end_price": 89.14,
        "news_story": "Hewlett Packard Launches New Generation of Quantum Computers, Aiming to Transform the Tech Industry",
        "symbol": "HPQ"
    },
    {
        "company": "IBM",
        "description": "IBM is an international IT company. IBM offers infrastructure and technology services, software for business integration and information management, data warehousing, and more.",
        "industry_vertical": "Technology & E-commerce",
        "initial_price": 118.37,
        "end_price": 203.45,
        "news_story": "IBM Develops AI Capable of Predicting Market Trends, Set to Revolutionize Financial Analysis",
        "symbol": "IBM"
    },
    {
        "company": "Johnson & Johnson",
        "description": "Johnson & Johnson develops and manufactures health care products for sale worldwide. J&J products include brands like Johnson’s, Aveeno, Clean & Clear, Neutrogena, and more.",
        "industry_vertical": "Healthcare & Pharmaceuticals",
        "initial_price": 35.13,
        "end_price": 78.90,
        "news_story": "Johnson & Johnson to Launch a New Range of AI-Driven Personal Health Monitoring Devices",
        "symbol": "JNJ"
    },
    {
        "company": "Microsoft",
        "description": "Microsoft develops, manufactures, licenses, and supports a range of software products and services for various computing devices worldwide.",
        "industry_vertical": "Technology & E-commerce",
        "initial_price": 55.72,
        "end_price": 256.80,
        "news_story": "Microsoft Announces New Virtual Reality Operating System, Integrating VR with Daily Computing",
        "symbol": "MSFT"
    },
    {
        "company": "Monsanto",
        "description": "Monsanto provides agricultural products for farmers in the United States and internationally. It operates in two segments, Seeds and Genomics, and Agricultural Productivity.",
        "industry_vertical": "Agriculture & Biotechnology",
        "initial_price": 11.47,
        "end_price": 153.21,
        "news_story": "Monsanto Introduces Drought-Resistant Crops, Aiming to Improve Global Food Security",
        "symbol": "MO"
    },
    {
        "company": "PepsiCo",
        "description": "PepsiCo, Inc. manufactures, markets, and sells various foods, snacks, and carbonated and non-carbonated beverages worldwide.",
        "industry_vertical": "Food & Beverages",
        "initial_price": 34.13,
        "end_price": 105.67,
        "news_story": "PepsiCo to Launch a New Line of Health-Oriented Beverages, Focusing on Wellness and Nutrition",
        "symbol": "PEP"
    },
    {
        "company": "Starbucks",
        "description": "Starbucks Corp. provides specialty coffee and tea beverages, packaged and ground coffee beans, single-serve products, juices and food offerings worldwide.",
        "industry_vertical": "Food & Beverages",
        "initial_price": 6.23,
        "end_price": 48.93,
        "news_story": "Starbucks Expands Into Home Brewing Systems, Launching a New Range of Smart Coffee Machines",
        "symbol": "SBUX"
    },
    {
        "company": "Texas Instruments",
        "description": "Texas Instruments designs and sells semiconductors to electronics designers and manufacturers worldwide. The company has four segments: Analog, Embedded Processing, Wireless and Other.",
        "industry_vertical": "Technology & E-commerce",
        "initial_price": 53.88,
        "end_price": 97.12,
        "news_story": "Texas Instruments Announces Innovative Chip That Can Triple Smartphone Battery Life",
        "symbol": "TXN"
    }
]"""
q_a_pair = f"""
Customer message: ```{customer_message}```
Product information: ```{stock_information}```
Agent response: ```{final_response_to_customer}```

Does the response use the retrieved information correctly?
Does the response sufficiently answer the question

Output Y or N
"""
messages = [
    {'role': 'system', 'content': system_message},
    {'role': 'user', 'content': q_a_pair}
]

response = get_completion_from_messages(messages, max_tokens=1)
print(response)

Y


In [60]:
# Checking if model output is factual, example 2
another_response = "Nothing happened in those industries today."
q_a_pair = f"""
Customer message: ```{customer_message}```
Product information: ```{product_information}```
Agent response: ```{another_response}```

Does the response use the retrieved information correctly?
Does the response sufficiently answer the question?

Output Y or N
"""
messages = [
    {'role': 'system', 'content': system_message},
    {'role': 'user', 'content': q_a_pair}
]

response = get_completion_from_messages(messages)
print(response)

N
