In [1]:
import pandas as pd
import json


### read data and conf

In [2]:
json_path = '../data/paraphrase/text_files'
data_file_name = 'txt_dataset_paraphrase.json'
conf_file_name = 'txt_dataset_paraphrase_config.json'

In [3]:
data = pd.read_json(f'{json_path}/{data_file_name}', orient='index')
with open(f'{json_path}/{conf_file_name}') as f:
    conf_json = json.load(f)

data.head(3)

Unnamed: 0,text,char_count,notion,grammarly,current_rephraser
0,Great job on the new website design! I love th...,69,The new website design looks fantastic! I real...,Great work on the new website design! I really...,1. Awesome work on the new website! The new co...
1,Just reviewed the social media campaign tasks ...,91,I have just reviewed the tasks for the social ...,I have just reviewed the social media campaign...,1. Checked out the social media stuff and it's...
2,I received some positive feedback from custome...,97,The marketing team has done an excellent job a...,I received positive feedback about the new sur...,1. Customers are loving the new survey! Great ...


In [4]:
print(data.iloc[5]['grammarly'])

Great news! We have scheduled a client meeting this week to discuss their upcoming project requirements. Let's prepare a detailed agenda to ensure that we cover all their needs and pain points. This is a fantastic opportunity for us to demonstrate our expertise and build a strong relationship with the client. We can discuss the project scope, timeline, and potential challenges during the meeting. Do you have any suggestions on specific areas we should focus on?


# Building new prompts

In [6]:
import openai
import os
from dotenv import load_dotenv
from modules.openai.OpenaiModels import OpenaiChatCompletionRephraser, OpenaiChatCompletionCurrentRephraser

load_dotenv()

azure_openai_key = os.environ.get('AZURE_OPENAI_KEY')
azure_openai_endpoint = os.environ.get('AZURE_OPENAI_ENDPOINT')

openai.api_key = azure_openai_key
openai.api_base = azure_openai_endpoint
openai.api_type = "azure"

In [27]:
# text = data.iloc[0].text
# 
# localization_prompt_template = """Consider the language used in the following input text: {input_text}. Please provide your response in the same language as the one used in this input text. Your response may involve a variety of data types, such as JSON, XML, HTML, and other programming or markup formats. It's important to note, though, that any code or markup language should remain untouched during translation; only translate plain text elements"""
# 
# system_prompt = ""
# 
# user_prompt = """{language_input}. Paraphrase the following text into a more {style} style, return numbered list of strings contains 3 new paraphrased texts:
# 
#   {input_text}"""
# 
# current_rephraser = OpenaiChatCompletionCurrentRephraser(model_name="gpt-3-5-turbo",
#                                                          api_version="2023-05-15",
#                                                          temperature=0.7,
#                                                          max_tokens=256,
#                                                          user_prompt_template=user_prompt,
#                                                          localization_prompt_template=localization_prompt_template,
#                                                          style='casual')

In [38]:
# print(text+'\n\n')
# print(current_rephraser.call_llm(text))

In [15]:
localization_prompt_template = """Consider the language used in the following input text: {input_text}. Please provide your response in the same language as the one used in this input text. Your response may involve a variety of data types, such as JSON, XML, HTML, and other programming or markup formats. It's important to note, though, that any code or markup language should remain untouched during translation; only translate plain text elements"""

system_prompt = """You are a professional assistant. Using your deep linguistic understanding and knowledge, Rephrase the text provided by the user while retaining the core ideas, crucial details, and significant context of it. Please fix any grammar mistakes and typos. {language_input}. The rephrased style should be {style}. Return numbered list of strings containing {n_paraphrase} new paraphrased texts."""

user_prompt = """Please rephrase the following text: {input_text}"""

version_2_rephraser = OpenaiChatCompletionRephraser(model_name="gpt-3-5-turbo",
                                                    api_version="2023-05-15",
                                                    temperature=0.7,
                                                    max_tokens=512,
                                                    system_prompt_template=system_prompt,
                                                    user_prompt_template=user_prompt,
                                                    localization_prompt_template=localization_prompt_template,
                                                    style='neutral',
                                                    n_paraphrase=3)

In [18]:
text = data.iloc[5].text
text

"Exciting news! We secured a client meeting this week to discuss their upcoming project requirements. Let's prepare a detailed agenda to ensure we cover all their needs and pain points. This is a great opportunity to showcase our expertise and build a strong relationship with the client. We can discuss the scope, timeline, and any potential challenges during the meeting. Any suggestions on specific areas we should focus on?"

In [19]:
print(version_2_rephraser.call_llm(text))

1. We have some thrilling news to share! We have successfully scheduled a meeting with a client this week to talk about their upcoming project needs. To make sure we address all their requirements and concerns, let's create a comprehensive agenda. This meeting is a fantastic chance for us to demonstrate our proficiency and establish a robust rapport with the client. We can delve into the project's scope, timeline, and potential obstacles during the meeting. Do you have any proposals on specific topics we should emphasize?

2. We've got some exciting news to share! We've managed to secure a client meeting this week to discuss their project requirements for the future. To make sure we address all their needs and pain points, let's prepare a detailed agenda. This meeting is an excellent opportunity for us to showcase our expertise and develop a solid relationship with the client. During the meeting, we can discuss the project's scope, timeline, and any potential challenges. Would you like

In [20]:
output,res = version_2_rephraser.call_llm_with_res(text)

In [21]:
res["usage"]

<OpenAIObject at 0x16233af90> JSON: {
  "prompt_tokens": 257,
  "completion_tokens": 289,
  "total_tokens": 546
}

In [1]:
# version_2_template = """Using your deep linguistic understanding and knowledge, condense the essence of the provided content.\n Retain the core ideas, crucial details, and significant context while omitting any extraneous information.\n Please deliver a concise and coherent summary of the following text.\n The summary MUST be shorter or equal to {max_words} words.\n The output MUST be a JSON object with the following schema:
# {{"title":<the title>,"body":<the summary>}}\n----------------------\n\nThe text to summarize: {input_text}\n The output:\n"""
# 
# version_2_sum = OpenaiCompletionWordLimit(
#     model_name="text-davinci-003",
#     api_version="2023-05-15",
#     temperature=0.0,
#     max_tokens=512,
#     prompt_template=version_2_template,
#     max_word_ratio=0.75
# )

In [22]:
print(version_2_rephraser.call_llm("שלום קוראים לי דוד ואני מהנדס תוכנה בכיר, אני מתרגש להתחיל לעבוד אתכם."))

1. שמי הוא דוד ואני מקצוען בעולם התוכנה כבר שנים רבות. אני מצפה להתחיל את העבודה איתכם בתור מהנדס תוכנה בכיר.
2. אני משתף פעולה כמה שנים כבר עם חברות רבות ומאמין שאוכל לתרום לצוות שלכם כמה שיותר. אני מצפה להתחיל את העבודה כמהנדס תוכנה בכיר.
3. דוד הינו מהנדס תוכנה ומחפש אתגר חדש ומרתק בתחום התוכנה. אני מצפה להתחיל לעבוד יחד עם צוות המוכשר שלכם.


### Apply on dataset

In [23]:
model_name = "version_2_rephraser"

In [24]:
# completion api
data[model_name] = data['text'].apply(version_2_rephraser.call_llm)

In [81]:
print(data.iloc[5].text)

Exciting news! We secured a client meeting this week to discuss their upcoming project requirements. Let's prepare a detailed agenda to ensure we cover all their needs and pain points. This is a great opportunity to showcase our expertise and build a strong relationship with the client. We can discuss the scope, timeline, and any potential challenges during the meeting. Any suggestions on specific areas we should focus on?


In [25]:
print(data.iloc[5][model_name])

1. We have some thrilling news! We have successfully secured a client meeting this week to discuss their project requirements. To ensure that we address all their needs and challenges, let's prepare a comprehensive agenda. This is an excellent chance to demonstrate our proficiency and establish a solid rapport with the client. During the meeting, we can delve into the project's scope, timeline, and any possible obstacles. Do you have any specific recommendations on areas we should prioritize?

2. Our team has secured a client meeting this week to discuss their upcoming project requirements, and we couldn't be more excited! To ensure that we cover all their needs and pain points, let's put together a detailed agenda. This is a fantastic opportunity to showcase our expertise and develop a strong relationship with the client. We plan to discuss the scope, timeline, and any potential obstacles during the meeting. Are there any specific areas that you believe we should focus on?

3. We have

### update config

In [26]:
conf_json.update({model_name: version_2_rephraser.get_config()})

In [27]:
conf_json

{'current_rephraser': {'model_name': 'gpt-3-5-turbo',
  'api_version': '2023-05-15',
  'temperature': 0.7,
  'max_tokens': 256,
  'system_prompt_template': '',
  'user_prompt_template': '{language_input}. Paraphrase the following text into a more {style} style, return numbered list of strings contains 3 new paraphrased texts:\n\n  {input_text}',
  'top_p': 1,
  'n': 1,
  'localization_prompt_template': "Consider the language used in the following input text: {input_text}. Please provide your response in the same language as the one used in this input text. Your response may involve a variety of data types, such as JSON, XML, HTML, and other programming or markup formats. It's important to note, though, that any code or markup language should remain untouched during translation; only translate plain text elements",
  'style': 'casual'},
 'version_2_rephraser': {'model_name': 'gpt-3-5-turbo',
  'api_version': '2023-05-15',
  'temperature': 0.7,
  'max_tokens': 512,
  'system_prompt_templ

## Save data file and conf json

In [28]:
# save
data.to_json(f"{json_path}/{data_file_name}", orient='index', indent=4, force_ascii=True)
with open(f'{json_path}/{conf_file_name}', 'w', encoding='utf-8') as f:
    json.dump(conf_json, f, ensure_ascii=False, indent=4)