In [None]:
# importing essential libraries
from google.colab import drive
!pip install openai
import openai
import json
!pip install tenacity
from tenacity import retry, wait_random_exponential, stop_after_attempt
# Installing and importing Wiki api
!pip install -q -U wikipedia-api
!pip3 install wikipedia-api
import wikipediaapi



# ***Using ChatGPT through API***

## ***Installing and importing OpenAI***

In [None]:
#Install OpenAI

!pip install openai



## ***Mounting Google Drive in Collab***

In [None]:
#Get content from google drive to google collab - MOUNT Google drive
from google.colab import drive
drive.mount('/content/drive', force_remount = True)   #content/drive is the default location for google drive & force remount helps remount any new file uploaded to the drive

Mounted at /content/drive


In [None]:
#Updates the file path to GenAI folder in Google Drive
filepath = "/content/drive/MyDrive/GenAI/"   # Check the drive structure from 'Files' in google collab after mounting

In [None]:
# Check if drive got mounted or not - ls command shows all files present inside your directory
!ls "/content/drive/MyDrive/GenAI"

 AI_tutor_system_message_1.txt			 'Investor Call Transcript - Q2FY23 Results.txt'
 AsianPaints.txt				  laptop_descriptions.csv
 earnings-call-transcript-q4-fy23.pdf		  tata_motors_transcript_sample.txt
'Investor Call Transcript - Q2FY23 Results.pdf'   tata_transcript.txt


In [None]:
# importing openai
import openai

**Steps:**
* Go to https://platform.openai.com/docs/overview
* Your profile > Billing > Add account details and $5 to your account balance >
* Dashboard > API Keys > Create New Secret Key > Add a Name > Save the key > Close

In [None]:
# connecting to openai
openai.api_key = "############"  #Hide key because it is chargeable

# Option 2 - Can update secret key in key icon on google collab
# Option 3 - Can place the key in google drive and read from there

# ***Tenacity to use Exponential Backoff to handle Rate Limit***

In [None]:
!pip install tenacity



In [None]:
# Best practice
import openai
from tenacity import retry, wait_random_exponential, stop_after_attempt

# Retry up to 6 times with exponential backoff, starting at 1 second and maxing out at 20 seconds delay
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))     # DECORATOR in Python
def get_response(user_prompt):
    MODEL = 'gpt-3.5-turbo-16k'

    message = [{"role":"user","content":user_prompt}]

    chat_response = openai.chat.completions.create(
        model = MODEL,
        messages = message)

    return chat_response.choices[0].message.content

# ***Prompt Engineering - "Laptop Descriptions" Case Study***

In [None]:
import pandas as pd
import numpy as np
df = pd.read_csv('laptop_descriptions.csv')
df.head()

Unnamed: 0,laptop_description
0,The Dell Inspiron is a versatile laptop that c...
1,The MSI GL65 is a high-performance laptop desi...
2,The HP EliteBook is a premium laptop designed ...
3,The Lenovo IdeaPad is a versatile laptop that ...
4,The ASUS ZenBook Pro is a high-end laptop that...


## ***1) Auto-classification***

In [None]:
mcq1_prompt = '''
From the description of a laptop (delimited by '###'), you have to identify what role does the laptop serve.
Refer to the key value pairs of categories and category details below. Identify which of the following details does the product description fits best and assign that category to that latpop. \n
Categories:
[
    'general': 'For general purpose use such as light web browsing, editing documents etc.'
    'business': 'For business users, the focus is on portability, battery backup and general purpose use.'
    'gamer': 'For gamers, the focus is primarily on high-performance, high-end graphics requirement, efficient processor etc.'
    'programmer': 'For programmers, the focus is on performance, battery backup, high-end RAM etc.'
    'multimedia': 'For multimedia use cases, the requirements are a good quality/ high resolution display, wide screens, good audio and video quality, battery backup, efficiency etc.' # Write the prompt here
] \n
Laptop description: {description}
'''

In [None]:
def get_chat_response_mcq1(user_request):

  '''
  This function ONLY takes `user_request` as the input argument.
  As you can see, the System Prompt is given inside the function itself so we don't require to give it as an input argument
  '''
  MODEL = 'gpt-3.5-turbo-16k'# Define GPT model

  SYSTEM_MESSAGE = '''You are a shopping assistant. The user will give you laptop description and some categories and their details.
  You have to find out which of the categories does the laptop fit best according to description.
  Remember to only give one word output, the category name, from the list of categories only, which resembles most closely.''' # Default System Message

  try:

    messages = [
        {"role": 'system', "content": SYSTEM_MESSAGE},
        {"role": 'user', "content": user_request}
    ] # Define the list of messages

    response = openai.chat.completions.create(
        model = MODEL,
        messages = messages
    )
        # Get the ChatCompletion Response from the GPT-3.5 model

    # Parse the response_content from the message
    response_content = response.choices[0].message.content

    return response_content

  # Raise exception error
  except Exception as e:
    print(f"An error occurred: {e}")
    return None

In [None]:
a = df.copy()

In [None]:
#a.to_dict()
# Laptop description is key inside with 0 key with 1st description, 1 key with 2nd description and so on

In [None]:
#a.to_dict(orient ='records')
# Laptop description is the key for all descriptions

In [None]:
def tag_laptop():

  laptop_df = df.copy()
  laptop_dict = laptop_df.to_dict(orient ='records')


  # Get the laptop_category for each laptop_decription df['Description'] by iterating over the dataframe with a for-loop

  for i in range(len(laptop_dict)):
    prompt = mcq1_prompt.format(description=laptop_dict[i]['laptop_description'])
    laptop_category = get_chat_response_mcq1(prompt)
    # Assign the laptop category to the column laptop_category
    laptop_df.at[i,'Category'] = laptop_category   #df.loc to set multiple values, df.at to update a single value in DataFrame or Series

  # return the DataFrame
  return laptop_df

In [None]:
tag_df = tag_laptop()
tag_df.head()

Unnamed: 0,laptop_description,Category
0,The Dell Inspiron is a versatile laptop that c...,multimedia
1,The MSI GL65 is a high-performance laptop desi...,gamer
2,The HP EliteBook is a premium laptop designed ...,business
3,The Lenovo IdeaPad is a versatile laptop that ...,multimedia
4,The ASUS ZenBook Pro is a high-end laptop that...,gamer


In [None]:
tag_df['Category'].value_counts()

Category
multimedia                                                   8
gamer                                                        4
business                                                     3
programmer                                                   2
general                                                      2
The laptop description fits best in the 'gamer' category.    1
Name: count, dtype: int64

In [None]:
'''
1) Homework : Try examples in your company for all 3 use case (Extract info, Few shot prompting, Classification)

2) Read about package --> wikipediaapi (to bring all data from wikipedia here) from wiki pedia get me answers

3) For all laptop descriptions, output the data in this format
'''

'''
structure = {
    "Brand": ___ ,
    "Model Name": ___ ,
    "GPU processor": ___ ,
    "Display Resolution": ___ ,
    "Weight": ___ ,
    "Processor": ___ ,
    "Clock speed": ___ ,
    "Budget": ___
}
'''

'\nstructure = {\n    "Brand": ___ ,\n    "Model Name": ___ ,\n    "GPU processor": ___ ,\n    "Display Resolution": ___ ,\n    "Weight": ___ ,\n    "Processor": ___ ,\n    "Clock speed": ___ ,\n    "Budget": ___\n}\n'

## ***2) Extract Laptop Specifications***

In [None]:
'''
mcq2_prompt =
From the description of a laptop (delimited by '###'), you have to extract information and output in JSON format to the keys specified.
Refer to the keys mentioned below and insert the values by extracting details from laptop description.
Laptop Name = {
    "Brand": ___ ,
    "Model Name": ___ ,
    "GPU processor": ___ ,
    "Display Resolution": ___ ,
    "Weight": ___ ,
    "Processor": ___ ,
    "Clock speed": ___ ,
    "Budget": ___
}
Laptop description: {description}
'''

'\nmcq2_prompt = \nFrom the description of a laptop (delimited by \'###\'), you have to extract information and output in JSON format to the keys specified.\nRefer to the keys mentioned below and insert the values by extracting details from laptop description.\nLaptop Name = {\n    "Brand": ___ ,\n    "Model Name": ___ ,\n    "GPU processor": ___ ,\n    "Display Resolution": ___ ,\n    "Weight": ___ ,\n    "Processor": ___ ,\n    "Clock speed": ___ ,\n    "Budget": ___\n}\nLaptop description: {description}\n'

In [None]:
mcq2_prompt = '''
Laptop Decription: {description}
From the laptop decription above, you have to extract relevant values for the following dictionary items. The dictionary structure should have keys
"Brand","Model Name","GPU processor","Display Resolution","Weight","Processor","Clock speed","Budget"
Try giving quantitative, absolute, or numerical outputs. Try not to give qualitative or adjective outputs. for example: If the processing speed of a laptop is 2.4GHz, then, in the "processing speed" key, give output as '2.4GHz' instead of 'very fast'.
Extract only one word values of these properties. Fill in the blanks for each product and output each product's dictionary in json format.
'''

In [None]:
def get_chat_response_mcq2(user_request):

  MODEL = 'gpt-3.5-turbo-16k'# Define GPT model

  SYSTEM_MESSAGE = 'You are a helpful shopping assitant.'  # Default System Message

  try:
  # Define the list of messages
    messages = [
        {'role': 'system', 'content': SYSTEM_MESSAGE},
        {'role': 'user', 'content': user_request}
    ]

    # Get the ChatCompletion Response from the GPT-3.5 model
    response = openai.chat.completions.create(
        model = MODEL,
        messages = messages,
        #response_format = {'type': 'json_object'}
    )

    # Parse the response_content from the message
    response_content = response.choices[0].message.content


    return response_content

  # Raise exception error
  except Exception as e:
    print(f"An error occurred: {e}")
    return None

In [None]:
def extract_laptop_spec():

  laptop_df = df.copy()
  laptop_dict = laptop_df.to_dict(orient ='records')

  # Creating an empty list to store the properties
  result = []

  # Get the relevant values for each of the property:
  for i in range(len(laptop_dict)):
    prompt = mcq2_prompt.format(description=laptop_dict[i]['laptop_description'], str='structure')
    values = get_chat_response_mcq2(prompt)
    result.append(values)
    # We will print each of the dictionary of properties
    print(result[i])

  # But in the function output, we are returning the whole list as a whole.
  return result

In [None]:
values = extract_laptop_spec()

{
  "Brand": "Dell",
  "Model Name": "Inspiron",
  "GPU processor": "Intel",
  "Display Resolution": "1920x1080",
  "Weight": "2.5 kg",
  "Processor": "Intel",
  "Clock speed": "2.4 GHz",
  "Budget": "35,000"
}
{
  "Brand": "MSI",
  "Model Name": "GL65",
  "GPU processor": "NVIDIA",
  "Display Resolution": "1920x1080",
  "Weight": "2.3 kg",
  "Processor": "Intel",
  "Clock speed": "2.6 GHz",
  "Budget": "55,000"
}
{
  "Brand": "HP",
  "Model Name": "EliteBook",
  "GPU processor": "Intel UHD",
  "Display Resolution": "1920x1080",
  "Weight": "1.5 kg",
  "Processor": "Intel Core",
  "Clock speed": "2.8 GHz",
  "Budget": "90,000"
}
{
    "Brand": "Lenovo",
    "Model Name": "IdeaPad",
    "GPU processor": "Intel UHD",
    "Display Resolution": "1366x768",
    "Weight": "2.2 kg",
    "Processor": "Intel Core i3",
    "Clock speed": "2.1 GHz",
    "Budget": "25,000"
}
{
  "Brand": "ASUS",
  "Model Name": "ZenBook Pro",
  "GPU processor": "NVIDIA",
  "Display Resolution": "3840x2160",
  "Wei