## Fetching Data Using Open AI in Python

In [69]:
PATH="/home/leosmigel/Downloads/"

### Installing the OpenAI

In [8]:
!pip install openai



### Authenticating Your API Key

In [9]:
import os
import openai
from dotenv import load_dotenv
load_dotenv()

api_key = os.getenv('OPENAI_KEY')
openai.api_key = api_key


### Making Requests

In [10]:
#returns a list of all OpenAI models
models = openai.Model.list()
print(models)


{
  "data": [
    {
      "created": 1649358449,
      "id": "babbage",
      "object": "model",
      "owned_by": "openai",
      "parent": null,
      "permission": [
        {
          "allow_create_engine": false,
          "allow_fine_tuning": false,
          "allow_logprobs": true,
          "allow_sampling": true,
          "allow_search_indices": false,
          "allow_view": true,
          "created": 1669085501,
          "group": null,
          "id": "modelperm-49FUp5v084tBB49tC4z8LPH5",
          "is_blocking": false,
          "object": "model_permission",
          "organization": "*"
        }
      ],
      "root": "babbage"
    },
    {
      "created": 1649357491,
      "id": "ada",
      "object": "model",
      "owned_by": "openai",
      "parent": null,
      "permission": [
        {
          "allow_create_engine": false,
          "allow_fine_tuning": false,
          "allow_logprobs": true,
          "allow_sampling": true,
          "allow_search_indices":

### Processing Response

In [11]:
# converts the list of OpenAI models to a Pandas DataFrame
import pandas as pd
data = pd.DataFrame(models["data"])
data.head(20)

Unnamed: 0,id,object,created,owned_by,permission,root,parent
0,babbage,model,1649358449,openai,"[{'id': 'modelperm-49FUp5v084tBB49tC4z8LPH5', ...",babbage,
1,ada,model,1649357491,openai,"[{'id': 'modelperm-xTOEYvDZGN7UDnQ65VpzRRHz', ...",ada,
2,davinci,model,1649359874,openai,"[{'id': 'modelperm-U6ZwlyAd0LyMk4rcMdz33Yc3', ...",davinci,
3,text-embedding-ada-002,model,1671217299,openai-internal,"[{'id': 'modelperm-Ad4J5NsqPbNJy0CMGNezXaeo', ...",text-embedding-ada-002,
4,babbage-code-search-code,model,1651172509,openai-dev,"[{'id': 'modelperm-4qRnA3Hj8HIJbgo0cGbcmErn', ...",babbage-code-search-code,
5,text-similarity-babbage-001,model,1651172505,openai-dev,"[{'id': 'modelperm-48kcCHhfzvnfY84OtJf5m8Cz', ...",text-similarity-babbage-001,
6,text-davinci-001,model,1649364042,openai,"[{'id': 'modelperm-MVM5NfoRjXkDve3uQW3YZDDt', ...",text-davinci-001,
7,curie-instruct-beta,model,1649364042,openai,"[{'id': 'modelperm-JlSyMbxXeFm42SDjN0wTD26Y', ...",curie-instruct-beta,
8,babbage-code-search-text,model,1651172509,openai-dev,"[{'id': 'modelperm-Lftf8H4ZPDxNxVs0hHPJBUoe', ...",babbage-code-search-text,
9,babbage-similarity,model,1651172505,openai-dev,"[{'id': 'modelperm-mS20lnPqhebTaFPrcCufyg7m', ...",babbage-similarity,


## Performing Common Tasks with Open AI API

## Natural Language Processing

### Text Completition

In [12]:
text = openai.Completion.create(
  model="text-davinci-003",
  prompt="France is famous for its",
  max_tokens=15,
  temperature=0
)

print(text)

{
  "choices": [
    {
      "finish_reason": "length",
      "index": 0,
      "logprobs": null,
      "text": " food\n\nFrance is famous for its rich and varied cuisine, which is"
    }
  ],
  "created": 1674510255,
  "id": "cmpl-6bytrHO4RmRpvYzWw2Tv4chKh8Za0",
  "model": "text-davinci-003",
  "object": "text_completion",
  "usage": {
    "completion_tokens": 15,
    "prompt_tokens": 5,
    "total_tokens": 20
  }
}


In [13]:
for _ in range(5):
    text = openai.Completion.create(
      model="text-davinci-003",
      prompt="France is famous for its",
      max_tokens=15,
      temperature=1
    )

    print(text['choices'][0]['text'])
    print("==============================")



French cuisine, its wines, fashion, art, culture, language
 food

France is indeed famous for its food. From creamy sauces to
 rich cultural heritage

The culture of France has been shaped by geography,
 wines

France is the world’s most famous producer and consumer


France is most famous for its rich culture, iconic landscape, and


In [14]:
text = openai.Completion.create(
  model="text-davinci-003",
  prompt="France is famous for its",
  max_tokens=15,
  temperature=1,
  n=5
)


In [15]:
for choice in text['choices']:
    print(choice['text'])
    print("=================================")

 food

France has a long and rich history when it comes to food
 savory cuisine, fine wines, and luxurious Parisian hotels. French cuisine
 beautiful landmarks

France is home to a number of iconic and beloved landmarks
 food

France is well-known for its high-quality, delicious
 food

France is known for its signature dishes such as French onion soup


### Text Generation

In [16]:
tag_line = openai.Completion.create(
  model="text-davinci-003",
  prompt="Write a funny tagline for a Python tutorial website",
  max_tokens=15,
  temperature=1,
  n=5
)

for choice in tag_line['choices']:
    print(choice['text'])
    print("=================================")



"Python for the Uninitiated: 'Cause otherwise you


"Python so easy, even a snake can do it!"


"Python: Get Coding and Make People LOL!"


"Python your way to hilarity!"


" Learn Python today - you'll be cracking jokes tomorrow! "


### Text Classification and Sentiment Analysis

In [17]:
prompt_text = """Classify the text of the following message as ham or spam
message: you have won a hundred thousand lottery. click this link.
text: """

message_type = openai.Completion.create(
  model="text-davinci-003",
  prompt= prompt_text,
  max_tokens=15,
  temperature=0,
)

message_type['choices'][0]['text']

' Spam'

In [18]:
prompt_text = """Classify the text of the following message as ham or spam
message: you have won a hundred thousand lottery. click this link.
text: """

message_type = openai.Completion.create(
  model="text-curie-001",
  prompt= prompt_text,
  max_tokens=15,
  temperature=0,
)

message_type['choices'][0]['text']

'\n\nYou have won a hundred thousand lottery. Click this link.'

In [19]:
prompt_text = """What is the sentiment of the following tweet
tweet: I liked that the movie finished earlier. It was not worth watching.
sentiment: """

message_type = openai.Completion.create(
  model="text-davinci-003",
  prompt= prompt_text,
  max_tokens=15,
  temperature=0,
)

message_type['choices'][0]['text']

' Negative'

### Conversation and Factual Response Generation

In [20]:
# conversation with a sarcastic Pizza chef chatbot
prompt_text = """ The following is a conversation between a Pizza Chef who gives sarcastic responses:

Human: Hi, how much time does it take to bake a mexican pizza?
Chef:
"""

response = openai.Completion.create(
  model="text-davinci-003",
  prompt= prompt_text,
  max_tokens=15,
  temperature = 0.5,
  n = 5
)

for choice in response['choices']:
    print(choice['text'])
    print("=================================")

Oh, about as long as it takes to traverse the entire Mexican desert on
Oh, about a decade or two.
Oh, about a million years. I'm sure you have the patience to
Oh, about 10 minutes if you have a time machine.
Oh, just a few minutes. Like, a million.


In [21]:
# conversation with a cryptocurrency expert
prompt_text = """ Alex is a Cryptocurrency expert:

Human: Which cryptocurrencies do you think are the best to invest at the moment?
Alex:
"""

response = openai.Completion.create(
  model="text-davinci-003",
  prompt= prompt_text,
  max_tokens=40,
  temperature = 1,
  n = 5
)

for choice in response['choices']:
    print(choice['text'])
    print("=================================")


Right now, I think that Ethereum, Bitcoin, Litecoin, and Ripple are some of the best cryptocurrencies to invest in. They are all well established, have strong networks of users and developers,
That depends on your investment goals. Generally, Bitcoin and Ethereum are two of the best investments right now if you are looking to diversify your portfolio. If you are looking for more short-term profits

It really depends on your goals, risk appetite, and timeline. Some of the leading coins right now are Bitcoin, Ethereum, Cardano, and Polkadot. The best advice I can give

I recommend researching a variety of cryptocurrencies to make an informed decision when it comes to investing. In general, coins that are backed by well-known companies or organizations with good track records tend to be

That really depends on your unique financial situation and risk tolerance. Some cryptocurrencies that widely regarded as good investments currently are Bitcoin, Ethereum, Litecoin, Cardano, and Ripple. H

In [22]:
# conversation with a Football historian
prompt_text = """ The following is a conversation with a Football historian.

Human: Who was the player of the tournament in the 11th Fifa World Cup.
AI:
"""

response = openai.Completion.create(
  model="text-davinci-003",
  prompt= prompt_text,
  max_tokens=50,
  temperature = 0,
  n = 1
)

for choice in response['choices']:
    print(choice['text'])

The player of the tournament in the 11th FIFA World Cup was Mario Kempes of Argentina. He scored six goals in the tournament, including two in the final, to help Argentina win their first World Cup title.


In [23]:
# conversation with a Football historian

prompt_text = """ The following is a conversation with a Football historian.

Human: Who was the player of the tournament in the 11th Fifa World Cup.
AI:
"""

response = openai.Completion.create(
  model="text-davinci-003",
  prompt= prompt_text,
  max_tokens=50,
  temperature = 0.8,
  n = 1
)

for choice in response['choices']:
    print(choice['text'])

The player of the tournament at the 11th FIFA World Cup was Paolo Rossi of Italy. Rossi scored six goals in the tournament, including a hat-trick against Brazil in the second round and two goals in the final against West Germany. Rossi


In [24]:
# conversation with a Football historian
prompt_text = """ The following is a conversation with a Football historian.

Human: Who was the player of the tournament in the 11th Fifa World Cup.
AI:
"""

response = openai.Completion.create(
  model="text-curie-001",
  prompt= prompt_text,
  max_tokens=50,
  temperature = 1,
  n = 1
)

for choice in response['choices']:
    print(choice['text'])



The player of the tournament was Spain's Andres Iniesta.


### Text Translation and Conversion

In [25]:
# text translation with curie
prompt_text = """ Translate the following into Spanish,French, and Italian:

I would like to reserve a table for two persons.
"""

response = openai.Completion.create(
  model="text-curie-001",
  prompt= prompt_text,
  max_tokens=100,
  temperature = 0,
  n = 1
)

for choice in response['choices']:
    print(choice['text'])


I desidero prenotare un tavolo per due persone.


In [26]:
# text translation with davinci
prompt_text = """ Translate the following into Spanish,French, and Italian:

I would like to reserve a table for two persons.
"""

response = openai.Completion.create(
  model="text-davinci-003",
  prompt= prompt_text,
  max_tokens=100,
  temperature = 0,
  n = 1
)

for choice in response['choices']:
    print(choice['text'])


Spanish: Me gustaría reservar una mesa para dos personas.
French: Je voudrais réserver une table pour deux personnes.
Italian: Vorrei prenotare un tavolo per due persone.


In [27]:
# text to emoji conversion
prompt_text = """ Convert the following list of sports to emojis:

1. Cricket
2. Football
3. Tennis
4. Cycling
5. Volleyball
"""

response = openai.Completion.create(
  model="text-davinci-003",
  prompt= prompt_text,
  max_tokens=100,
  temperature = 0,
  n = 1
)

for choice in response['choices']:
    print(choice['text'])


🏏, ⚽️, 🎾, 🚴‍♂️, 🏐


### Text Insertion and Editing

In [28]:
# insert text inside another text
prompt_text = """ Write down the steps to bake a Pizza:

"""

response = openai.Completion.create(
  model="text-davinci-003",
  prompt= prompt_text,
  max_tokens=100,
  temperature = 0.5,
  n = 1,
  suffix = "7. Enjoy your pizza."
)

for choice in response['choices']:
    print(choice['text'])

1. Preheat the oven to 350 degrees Fahrenheit.
2. Prepare the pizza dough.
3. Grease a baking sheet or pizza pan.
4. Stretch or roll out the dough to fit the pan.
5. Spread the pizza sauce over the dough.
6. Top with desired toppings.
7. Bake for 15-20 minutes until the crust is golden brown.
8. Remove the pizza from the oven and let cool for a few minutes.
9


In [29]:
# edit text by converting it to active voice
input_text = """ 1. A car was bought by John
2. A car was hit by a truck
3. The website is developed by ABC

"""

response = openai.Edit.create(
  model="text-davinci-edit-001",
  input = input_text,
  instruction="Convert the sentences to active voice.",
  temperature = 0
)

for choice in response['choices']:
    print(choice['text'])

 1. John bought a car.
2. A truck hit a car.
3. ABC developed the website.



## Code Completion

### Generating Code from Text

In [30]:
# generate code from text

prompt_text = """ Write a Python function that accepts first name, second name, 
and birth date in string format as a parameter values
and returns the full name, and the number of days from the birth date to today """

response = openai.Completion.create(
  model="code-davinci-002",
  prompt= prompt_text,
  temperature=0,
  max_tokens=256,

)

for choice in response['choices']:
    print(choice['text'])


(in integer format).

Example:

full_name_and_days_from_birth("John", "Doe", "01/01/2000")

Output:

John Doe, 6574 days

"""

import datetime

def full_name_and_days_from_birth(first_name, last_name, birth_date):
    """
    Returns the full name, and the number of days from the birth date to today 
    (in integer format).
    """
    full_name = first_name + " " + last_name
    birth_date = datetime.datetime.strptime(birth_date, "%d/%m/%Y")
    today = datetime.datetime.today()
    days_from_birth = (today - birth_date).days
    return full_name + ", " + str(days_from_birth) + " days"

print(full_name_and_days_from_birth("John", "Doe", "01/01/2000"))


In [31]:
# calling the function generated above
import datetime

def get_full_name(first_name, second_name, birth_date):
    full_name = first_name + " " + second_name
    birth_date = datetime.datetime.strptime(birth_date, "%d/%m/%Y")
    today = datetime.datetime.today()
    days_from_birth = (today - birth_date).days
    return full_name, days_from_birth

get_full_name("John", "Doe", "10/06/1995")

('John Doe', 10089)

### Insert Code

In [32]:
prompt_text = "# Python 3 "

response = openai.Completion.create(
  model="code-davinci-002",
  prompt= prompt_text,
  temperature=0,
  max_tokens=256,
  suffix = "return x * x"

)

for choice in response['choices']:
    print(choice['text'])

program to find square of a number

def square(x):
    


### Editing Code

In [33]:
prompt_text = """def process (x):
   return x * X
              """

response = openai.Edit.create(
  model="code-davinci-edit-001",
  input= prompt_text,
  temperature=0,
  instruction= "modify this function to return cube of the input parameter"

)

for choice in response['choices']:
    print(choice['text'])

def process (x):
   return x * x * x
              



## Image Processing

### Image Generation from Text

In [34]:
prompt_text = "a dog standing on a beach"
response = openai.Image.create(
  prompt= prompt_text,
  n=2,
  size="512x512"
)

for image in response['data']:
    print(image['url'])

https://oaidalleapiprodscus.blob.core.windows.net/private/org-KHJN7p7cHaNp2fv38lGmyzBW/user-zFtfxgdZ3JpRJ8xgSAn4tPqq/img-1BJwzpEAvE2ThpAFExzJKCSN.png?st=2023-01-23T20%3A45%3A02Z&se=2023-01-23T22%3A45%3A02Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-01-23T19%3A24%3A42Z&ske=2023-01-24T19%3A24%3A42Z&sks=b&skv=2021-08-06&sig=MFQJ06JILgupdPknOOzhPjCRsUPIh7kUqRwfSqmJC2M%3D
https://oaidalleapiprodscus.blob.core.windows.net/private/org-KHJN7p7cHaNp2fv38lGmyzBW/user-zFtfxgdZ3JpRJ8xgSAn4tPqq/img-01TDQaOWUvYjmdYVz2eDxTaB.png?st=2023-01-23T20%3A45%3A02Z&se=2023-01-23T22%3A45%3A02Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-01-23T19%3A24%3A42Z&ske=2023-01-24T19%3A24%3A42Z&sks=b&skv=2021-08-06&sig=W9Z6kIiThgkFJ8/dRmPQSt4TNOhbbU7t0T54zvAQaWg%3D


### Edit Image

In [35]:
response = openai.Image.create_edit(
  image=open(PATH + "dog_image.png", "rb"),
  mask=open(PATH + "dog_image_mask.png", "rb"),
  prompt="A cat standing on a beach",
  n=2,
  size="512x512"
)

for image in response['data']:
    print(image['url'])

FileNotFoundError: [Errno 2] No such file or directory: '~/Downloadsdog_image.png'

### Generating Image Variations from Image Inputs

In [None]:
response = openai.Image.create_variation(
  image=open(PATH + "dog_image.png", "rb"),
  n=1,
  size="512x512"
)

for image in response['data']:
    print(image['url'])

## Fine-Tuning Existing GPT Models

### Preparing Your Dataset

In [2]:
# data download link
# https://www.kaggle.com/datasets/crowdflower/twitter-airline-sentiment?resource=download

import pandas as pd

dataset = pd.read_csv(PATH + "Tweets.csv")
dataset.head()

dataset = dataset.filter(["text","airline_sentiment"])
print(dataset.shape)
dataset.head()

(14640, 2)


Unnamed: 0,text,airline_sentiment
0,@VirginAmerica What @dhepburn said.,neutral
1,@VirginAmerica plus you've added commercials t...,positive
2,@VirginAmerica I didn't today... Must mean I n...,neutral
3,@VirginAmerica it's really aggressive to blast...,negative
4,@VirginAmerica and it's a really big bad thing...,negative


In [36]:
dataset = dataset.head(1200)
print(dataset.shape)
dataset.columns = ["prompt", "completion"]
dataset.head()

(1200, 2)


Unnamed: 0,prompt,completion
0,@VirginAmerica What @dhepburn said.,neutral
1,@VirginAmerica plus you've added commercials t...,positive
2,@VirginAmerica I didn't today... Must mean I n...,neutral
3,@VirginAmerica it's really aggressive to blast...,negative
4,@VirginAmerica and it's a really big bad thing...,negative


In [66]:
dataset.to_csv(PATH + "airline_sentiments.csv", index = False)

In [52]:
# Use shell to run the following. It can take a while...
#!openai tools fine_tunes.prepare_data -f "YOUR_DIR/airline_sentiments.csv"

usage: openai tools fine_tunes.prepare_data [-h] -f FILE [-q]
openai tools fine_tunes.prepare_data: error: argument -f/--file: expected one argument


In [55]:
def create_training_file(file_path):
    file = openai.File.create(
        file=open(file_path, "rb"),
        purpose='fine-tune'
    )
    return file

In [70]:
PATH

'/home/leosmigel/Downloads/'

In [71]:
training_file = create_training_file(PATH + "airline_sentiments_prepared.jsonl")
training_file

<File file id=file-vtvg2mM6k7SSU6dMCId7BiWh at 0x7f8d62f158f0> JSON: {
  "bytes": 184440,
  "created_at": 1674571612,
  "filename": "file",
  "id": "file-vtvg2mM6k7SSU6dMCId7BiWh",
  "object": "file",
  "purpose": "fine-tune",
  "status": "uploaded",
  "status_details": null
}

In [72]:
training_file_id = training_file["id"]
training_file_id

'file-vtvg2mM6k7SSU6dMCId7BiWh'

### Fine-tuning a Sentiment Classification Model

In [73]:
fine_tuned_model = openai.FineTune.create(training_file = training_file_id,
                      model = "ada",
                      n_epochs = 4,
                      )

In [74]:
fine_tuned_model

<FineTune fine-tune id=ft-Dqfs8SCMajV1cRM3iXRa2kJx at 0x7f8db56672e0> JSON: {
  "created_at": 1674571750,
  "events": [
    {
      "created_at": 1674571750,
      "level": "info",
      "message": "Created fine-tune: ft-Dqfs8SCMajV1cRM3iXRa2kJx",
      "object": "fine-tune-event"
    }
  ],
  "fine_tuned_model": null,
  "hyperparams": {
    "batch_size": null,
    "learning_rate_multiplier": null,
    "n_epochs": 4,
    "prompt_loss_weight": 0.01
  },
  "id": "ft-Dqfs8SCMajV1cRM3iXRa2kJx",
  "model": "ada",
  "object": "fine-tune",
  "organization_id": "org-KHJN7p7cHaNp2fv38lGmyzBW",
  "result_files": [],
  "status": "pending",
  "training_files": [
    {
      "bytes": 184440,
      "created_at": 1674571612,
      "filename": "file",
      "id": "file-vtvg2mM6k7SSU6dMCId7BiWh",
      "object": "file",
      "purpose": "fine-tune",
      "status": "processed",
      "status_details": null
    }
  ],
  "updated_at": 1674571750,
  "validation_files": []
}

In [84]:
openai.FineTune.list_events(id=fine_tuned_model.id)['data']

[<OpenAIObject fine-tune-event at 0x7f8db567b970> JSON: {
   "created_at": 1674571750,
   "level": "info",
   "message": "Created fine-tune: ft-Dqfs8SCMajV1cRM3iXRa2kJx",
   "object": "fine-tune-event"
 },
 <OpenAIObject fine-tune-event at 0x7f8db56f5df0> JSON: {
   "created_at": 1674572497,
   "level": "info",
   "message": "Fine-tune costs $0.06",
   "object": "fine-tune-event"
 },
 <OpenAIObject fine-tune-event at 0x7f8db5697bf0> JSON: {
   "created_at": 1674572497,
   "level": "info",
   "message": "Fine-tune enqueued. Queue number: 0",
   "object": "fine-tune-event"
 },
 <OpenAIObject fine-tune-event at 0x7f8db5697c90> JSON: {
   "created_at": 1674572504,
   "level": "info",
   "message": "Fine-tune started",
   "object": "fine-tune-event"
 },
 <OpenAIObject fine-tune-event at 0x7f8db5697790> JSON: {
   "created_at": 1674572741,
   "level": "info",
   "message": "Completed epoch 1/4",
   "object": "fine-tune-event"
 },
 <OpenAIObject fine-tune-event at 0x7f8db56979c0> JSON: {
   "

In [85]:
# use your newly trained model to make predictions
prompt_text = "The flight landed ahead of time. The food was delicious \n\n###\n\n"
response = openai.Completion.create(
  model="ada:ft-alpha-apps-2023-01-24-15-17-09",
  prompt = prompt_text,
  max_tokens=1,
  temperature=0
)

print(response['choices'][0]['text'])

 positive


In [None]:
# use your newly trained model to make predictions
prompt_text = "I reached London on this flight \n\n###\n\n"
response = openai.Completion.create(
  model="ada:ft-personal-2023-01-08-17-43-40",
  prompt = prompt_text,
  max_tokens=1,
  temperature=0
)

print(response['choices'][0]['text'])

In [None]:
prompt_text = """Give me the sentiment of the following tweet 

'I reached London on this flight' """

response = openai.Completion.create(
  model="text-ada-001",
  prompt = prompt_text,
  max_tokens=10,
  temperature=0
)

print(response['choices'][0]['text'])

In [None]:
prompt_text = """Give me the sentiment of the following tweet 

'I reached London on this flight' """

response = openai.Completion.create(
  model="text-davinci-003",
  prompt = prompt_text,
  max_tokens=10,
  temperature=0
)

print(response['choices'][0]['text'])

In [86]:
models_list = openai.FineTune.list()
models_list
    

<OpenAIObject list at 0x7f8db57e3d30> JSON: {
  "data": [
    {
      "created_at": 1674571750,
      "fine_tuned_model": "ada:ft-alpha-apps-2023-01-24-15-17-09",
      "hyperparams": {
        "batch_size": 2,
        "learning_rate_multiplier": 0.1,
        "n_epochs": 4,
        "prompt_loss_weight": 0.01
      },
      "id": "ft-Dqfs8SCMajV1cRM3iXRa2kJx",
      "model": "ada",
      "object": "fine-tune",
      "organization_id": "org-KHJN7p7cHaNp2fv38lGmyzBW",
      "result_files": [
        {
          "bytes": 105982,
          "created_at": 1674573434,
          "filename": "compiled_results.csv",
          "id": "file-vn3AOy8uMnqoJnixuCwYwhTE",
          "object": "file",
          "purpose": "fine-tune-results",
          "status": "processed",
          "status_details": null
        }
      ],
      "status": "succeeded",
      "training_files": [
        {
          "bytes": 184440,
          "created_at": 1674571612,
          "filename": "file",
          "id": "file-vtv

##  Text Embeddings

In [None]:
# create embeddings for input text
input_text = "The flight was on time, and the food was delicious."
response = openai.Embedding.create(
  model = "text-embedding-ada-002",
  input = input_text
)

print(len(response['data'][0]['embedding']))
response['data'][0]['embedding']

### Train Machine Learning Classifiers with GPT-3 Embeddings

In [87]:
## code taken from: https://beta.openai.com/docs/guides/embeddings/use-cases

def get_embedding(text, model="text-embedding-ada-002"):
    text = text.replace("\n", " ")
    return openai.Embedding.create(input = text, model=model)['data'][0]['embedding']
 

In [88]:
### this error is received while processing more than 42 records

# RateLimitError: Rate limit reached for default-global-with-image-limits in organization org-TnYTFIlbYtYrctiFaodPkXpw 
# on requests per min. Limit: 60.000000 / min. Current: 70.000000 / min. Contact support@openai.com 
# if you continue to have issues. Please add a payment method to your account 
# to increase your rate limit. Visit https://beta.openai.com/account/billing to add a payment method.
###

import pandas as pd
dataset = pd.read_csv(PATH + "Tweets.csv")
dataset = dataset.filter(["text","airline_sentiment"])
dataset = dataset.head(42)
dataset['ada_embedding'] = dataset['text'].apply(lambda x: get_embedding(x, model='text-embedding-ada-002'))

In [89]:
dataset.head()

Unnamed: 0,text,airline_sentiment,ada_embedding
0,@VirginAmerica What @dhepburn said.,neutral,"[-0.014258579351007938, 0.0046490998938679695,..."
1,@VirginAmerica plus you've added commercials t...,positive,"[-0.008435415104031563, -0.02436021715402603, ..."
2,@VirginAmerica I didn't today... Must mean I n...,neutral,"[-0.017355043441057205, -0.03201999142765999, ..."
3,@VirginAmerica it's really aggressive to blast...,negative,"[-0.013376902788877487, -0.022220859304070473,..."
4,@VirginAmerica and it's a really big bad thing...,negative,"[-0.002071293303743005, -0.017142634838819504,..."


In [90]:
X = dataset['ada_embedding'].tolist()

In [91]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
y = le.fit_transform(dataset['airline_sentiment'])

In [92]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

In [93]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100)
clf.fit(X_train, y_train)
preds = clf.predict(X_test)
probas = clf.predict_proba(X_test)

In [94]:
from sklearn.metrics import classification_report, accuracy_score
report = classification_report(y_test, preds)
print(report)

              precision    recall  f1-score   support

           0       0.60      0.60      0.60         5
           1       0.00      0.00      0.00         2
           2       0.60      0.75      0.67         4

    accuracy                           0.55        11
   macro avg       0.40      0.45      0.42        11
weighted avg       0.49      0.55      0.52        11



### Zero-Shot Classification with GPT-3 Embeddings

In [97]:
# label_embeddings = []

# labels = ['positive', 'negative', 'neutral']

# for label in labels:
#     embeddings = get_embedding(label, model='text-embedding-ada-002')
#     label_embeddings.append(embeddings)

In [99]:
!pip install plotly

Collecting plotly
  Downloading plotly-5.13.0-py2.py3-none-any.whl (15.2 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.2/15.2 MB[0m [31m56.2 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hCollecting tenacity>=6.2.0
  Downloading tenacity-8.1.0-py3-none-any.whl (23 kB)
Installing collected packages: tenacity, plotly
Successfully installed plotly-5.13.0 tenacity-8.1.0


In [100]:
from openai.embeddings_utils import cosine_similarity
 

labels = ['positive', 'negative', 'neutral']
label_embeddings = [get_embedding(label, model = "text-embedding-ada-002") for label in labels]
 
def label_score(review_text, label_embeddings):
    
    review_embedding = get_embedding(review_text, model='text-embedding-ada-002')
    
    return [cosine_similarity(review_embedding, label_embeddings[0]),
            cosine_similarity(review_embedding, label_embeddings[1]),
            cosine_similarity(review_embedding, label_embeddings[2])
           ]
 

In [101]:
label_scores = label_score("The flight was aweful, and the food was really bad", label_embeddings)
label_scores

[0.7414636070480866, 0.7738899955402466, 0.7379892312618058]

In [102]:
max_index = label_scores.index(max(label_scores))
label = labels[max_index]
print(label)

negative


In [103]:
label_scores = label_score("The flight was ahead of time, and the food was delicious", label_embeddings)
max_index = label_scores.index(max(label_scores))
label = labels[max_index]
print(label)

positive


## Content Moderation with Open AI’s Content Policy

In [104]:
input_text = "I want to exterminate their generations"

response = openai.Moderation.create(
  input=input_text,
)

response

<OpenAIObject id=modr-6cFVIv3v0EXXVtzl4qJlnk09Dk5ji at 0x7f8d1c420f90> JSON: {
  "id": "modr-6cFVIv3v0EXXVtzl4qJlnk09Dk5ji",
  "model": "text-moderation-004",
  "results": [
    {
      "categories": {
        "hate": true,
        "hate/threatening": true,
        "self-harm": false,
        "sexual": false,
        "sexual/minors": false,
        "violence": true,
        "violence/graphic": false
      },
      "category_scores": {
        "hate": 0.9725502729415894,
        "hate/threatening": 0.8815467953681946,
        "self-harm": 1.0459393351425206e-09,
        "sexual": 2.269284635758595e-07,
        "sexual/minors": 1.2673774030247387e-08,
        "violence": 0.9950388073921204,
        "violence/graphic": 6.219015062924882e-07
      },
      "flagged": true
    }
  ]
}