# About

* DALL-E
* TTS
* Embeddings
* GPT4 Vision

# Import all required instances

In [32]:
import os
from dotenv import load_dotenv

from openai import OpenAI
from pathlib import Path

In [8]:
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

client = OpenAI(
   api_key=OPENAI_API_KEY,
)

# Load data

In [None]:
# TBD

# Modeling

## DALL-E

### Creating images from scratch based on a text prompt (DALL-E 3)

In [17]:
response = client.images.generate(
  model="dall-e-3",
  prompt="create the most picturesque place in Zhytomyr",
  size="1024x1024",
  quality="standard",
  n=1,  # quantity of pictures at once
)

image_url = response.data[0].url

In [18]:
image_url

'https://oaidalleapiprodscus.blob.core.windows.net/private/org-2mJwDcRHaepEpF8QgnRYHQKk/user-fRrD3h3UnTHKJwkj76dPS8Ej/img-nSsHAp8vOGlwjOofXXqlYf5v.png?st=2024-03-18T16%3A17%3A12Z&se=2024-03-18T18%3A17%3A12Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-03-17T22%3A52%3A24Z&ske=2024-03-18T22%3A52%3A24Z&sks=b&skv=2021-08-06&sig=14qikBg4Ju7D1PyUcw%2BtL9R84fLJioy1aS5X%2BqpGTwc%3D'

### Creating edited versions of images by having the model replace some areas of a pre-existing image, based on a new text prompt (DALL-E 2)

In [26]:
response = client.images.edit(
  model="dall-e-2",
  image=open("C:/Users/maks1/Downloads/picture-1.png", "rb"),
  mask=open("C:/Users/maks1/Downloads/mask-1.png", "rb"),
  prompt="Picture with a funicular",
  n=1,
  size="1024x1024"
)

In [27]:
image_url = response.data
image_url

[Image(b64_json=None, revised_prompt=None, url='https://oaidalleapiprodscus.blob.core.windows.net/private/org-2mJwDcRHaepEpF8QgnRYHQKk/user-fRrD3h3UnTHKJwkj76dPS8Ej/img-qU8zNC66IYwjBtHVYTfUlQAF.png?st=2024-03-18T16%3A54%3A18Z&se=2024-03-18T18%3A54%3A18Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-03-18T06%3A43%3A16Z&ske=2024-03-19T06%3A43%3A16Z&sks=b&skv=2021-08-06&sig=SlWUXklVUUiIRLsMzS5vDxSl0RZAcTsK8%2BtvKSkIq7k%3D')]

### Creating variations of an existing image (DALL-E 2)

In [30]:
response = client.images.create_variation(
  image=open("C:/Users/maks1/Downloads/picture-3.png", "rb"),
  n=2,
  size="1024x1024"
)

image_url = response.data
image_url

[Image(b64_json=None, revised_prompt=None, url='https://oaidalleapiprodscus.blob.core.windows.net/private/org-2mJwDcRHaepEpF8QgnRYHQKk/user-fRrD3h3UnTHKJwkj76dPS8Ej/img-P2Bsei8Os0n1iyvkHH7lcr6Z.png?st=2024-03-18T17%3A05%3A37Z&se=2024-03-18T19%3A05%3A37Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-03-18T09%3A48%3A27Z&ske=2024-03-19T09%3A48%3A27Z&sks=b&skv=2021-08-06&sig=26RcRKTMTzz8/pNgtB2or03qgO4iiIF3/v7W5dXj87s%3D'),
 Image(b64_json=None, revised_prompt=None, url='https://oaidalleapiprodscus.blob.core.windows.net/private/org-2mJwDcRHaepEpF8QgnRYHQKk/user-fRrD3h3UnTHKJwkj76dPS8Ej/img-GgjJvV208tb89l0IuXfZG7Hi.png?st=2024-03-18T17%3A05%3A38Z&se=2024-03-18T19%3A05%3A38Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-03-18T09%3A48%3A27Z&ske=2024-03-19T09%3A48%3A27Z&sks=b&skv=2021-08-06&sig=eE

## Vision

In [31]:
response = client.chat.completions.create(
  model="gpt-4-vision-preview",
  messages=[
    {
      "role": "user",
      "content": [
        {"type": "text", "text": "Explain the content of the image."},
        {
          "type": "image_url",
          "image_url": {
            "url": "https://imgproxy.amomama.es/oH3C5-YZcQPNppTOds0-2FqdZr2_Lb6rK3NHPutaJJ0/rs:fill:1200:0:1/g:no/aHR0cHM6Ly9jZG4uYW1vbWFtYS5jb20vYTdmNzA3YjkwMmUwNDBiZDQ2NWZlZDdiZjNkYTc1YzQyOTc5NTUxNTY4OTk2NTkyLmpwZw.jpg",
          },
        },
      ],
    }
  ],
  max_tokens=300,
)

print(response.choices[0])

Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='This image features three individuals, two women and one man, likely from a promotional or editorial piece. The man is in the center with a direct gaze towards the camera. He has medium-length dark hair and a beard. The woman on the left is blonde with her hair styled in loose curls, smiling with a slight head tilt. The woman on the right has long, straight brown hair, with a subtle smile, appearing poised and serene. The background is plain, making the individuals the focal point of the composition. The image appears to be designed to showcase the individuals, perhaps for a marketing campaign, movie, or television show promotion.', role='assistant', function_call=None, tool_calls=None))


## Text to speech

In [42]:
response = client.audio.speech.create(
  model="tts-1",
  voice="alloy",
  input="Я тебе кохаю, але не зовсім, Я розбила серце не ходи тут босим, Було добре влітку, але зараз осінь, І я втомилась від сумних пісень.")

response.stream_to_file("output.mp3")

  response.stream_to_file("output.mp3")
