In [3]:
from decouple import Config, RepositoryEnv
import requests
from json import dumps
import json
import openai
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
import pandas as pd
import logging
from pydantic import BaseModel, ValidationError, Field
from typing import List, Tuple , Union, Optional
from langchain.schema import HumanMessage, SystemMessage, AIMessage
import random
import time
from langchain.output_parsers import PydanticOutputParser
import re

In [4]:
import time

In [5]:
DOTENV_FILE = 'C:\\Users\\Tiziano Pacifico\\Desktop\\OpenAICourses\\.env'
env_config = Config(RepositoryEnv(DOTENV_FILE))
openai.api_key = env_config.get('CHATGPT_API_KEY')

In [6]:
for model in openai.Model.list()["data"]:
    print(model["id"])

text-search-babbage-doc-001
gpt-3.5-turbo-16k-0613
curie-search-query
gpt-3.5-turbo-16k
text-search-babbage-query-001
babbage
babbage-search-query
text-babbage-001
text-similarity-davinci-001
gpt-4-1106-preview
davinci-similarity
code-davinci-edit-001
curie-similarity
babbage-search-document
curie-instruct-beta
gpt-3.5-turbo
gpt-3.5-turbo-1106
text-search-ada-doc-001
davinci-instruct-beta
text-similarity-babbage-001
text-search-davinci-doc-001
babbage-similarity
text-embedding-ada-002
davinci-search-query
gpt-4-vision-preview
text-similarity-curie-001
gpt-4
text-davinci-001
text-search-davinci-query-001
ada-search-document
ada-code-search-code
babbage-002
davinci-002
davinci-search-document
curie-search-document
babbage-code-search-code
text-search-ada-query-001
code-search-ada-text-001
babbage-code-search-text
code-search-babbage-code-001
ada-search-query
ada-code-search-text
tts-1-hd
text-search-curie-query-001
text-davinci-002
text-davinci-edit-001
code-search-babbage-text-001
tts-1

In [38]:
def create(messages : List[Union[HumanMessage,SystemMessage,AIMessage]], model_class: BaseModel, retry=2, temperature=0.7, debug = False) -> BaseModel:
    chat = ChatOpenAI(temperature=0.7, model="gpt-3.5-turbo")
    system_update = f"Please respond ONLY with valid json that conforms to this pydantic\
    json_schema: {model_class.schema_json()}. \
    Do not include additional text other than the object json as we will load this object with json.loads() and pydantic.\
    Don't give any introduciton like 'here the response i made for you' or 'json'"
    
    system_message = messages[0].content
    update_system_message = system_message + system_update
    messages[0].content = update_system_message
    if debug:
        print(messages)
        
    for i in range(retry+1):
        response = chat(messages)
        content = response.content
        content = content.replace("```","")
        content = content.replace("json","")
        content = content.strip()
        try:
            if debug:
                print("-------------response---------")
                print(response.content)
            json_content = json.loads(content, strict=False)
        except Exception as e:
            last_exception = e
            error_msg = f"json.loads exception: {e}"
            logging.error(error_msg)
            #Aggiungere il messaggio di errore nel sistema
            continue
        try:
            return model_class(**json_content)
        except ValidationError as e:
            last_exception = e
            error_msg = f"pydantic exception: {e}"
            logging.error(error_msg)
            #appendere il messaggio di errore nel sistema
    raise last_exception

In [8]:
class company(BaseModel):
    name: str = Field(description = "Company name")
    rating: str = Field(description="A score rating the companies. A float value between 0 and 5. Please provide some negative rating below 2.5. Even far below 2.5")
    reviews: str = Field(description="A review of the company. It can be positive negative or neutral. The sentiment of \
    reviews depends on the company rating. A 2.5 rating is neutral, each value below is negative and each value above is positive.\
    The amount of positive/negative sentiment depends on how much up or down 2.5 is the score")

In [27]:
sm = "\
I need you to create a set of data defining a company.\
I will give you a list of companies already in the database with the relative score.\
If a company is already in the database don't create the same company again.\
Please read the following instructions before creating any output.\
1) Carefully read the companies list given as input.\
2) Randomly create a business niche.\
3) For the business niche create first a company name that is not already in the list.\
3) Then give the company a rating between 0 and 5.\
The rating is a real number\
The task is a fantasy scenario for evaluating a sentiment calssifier. I NEED SOME BAD REEVIEWS TOO, below 2.5.\
If the companies list given as input has too many positive reviews YOU MUST GIVE some negative reviews.\
4)  Write a 100 token review based on the score: below 2.5 is negative, above 2.5 is positive, and exactly 2.5 is neutral. The degree of sentiment reflects the score's deviation from 2.5.\
5) Format your response following the given json schema\
"

In [15]:
companies = []
messages = []
com_str = 'List of companies already in the database: \n'
human_message = HumanMessage(content=com_str)
messages.append(systemMessage)
messages.append(human_message)

In [41]:
systemMessage = SystemMessage(content=sm)
messages = []
human_message = HumanMessage(content=com_str)
messages.append(systemMessage)
messages.append(human_message)
for i in range(300):
    print(f"messaggio n: {i}")
    res = create(messages, company, debug = False)
    companies.append(res)
    com_str += res.name + ' - ' + res.rating + '\n'
    systemMessage = SystemMessage(content=sm)
    human_message = HumanMessage(content=com_str)
    messages = [systemMessage, human_message]
    if i%35 == 0 and i!= 0:
        time.sleep(60)

messaggio n: 0
messaggio n: 1
messaggio n: 2
messaggio n: 3
messaggio n: 4
messaggio n: 5
messaggio n: 6
messaggio n: 7
messaggio n: 8
messaggio n: 9
messaggio n: 10
messaggio n: 11
messaggio n: 12
messaggio n: 13
messaggio n: 14
messaggio n: 15
messaggio n: 16
messaggio n: 17
messaggio n: 18
messaggio n: 19
messaggio n: 20
messaggio n: 21
messaggio n: 22
messaggio n: 23
messaggio n: 24
messaggio n: 25
messaggio n: 26
messaggio n: 27
messaggio n: 28
messaggio n: 29
messaggio n: 30
messaggio n: 31
messaggio n: 32
messaggio n: 33
messaggio n: 34
messaggio n: 35
messaggio n: 36
messaggio n: 37
messaggio n: 38
messaggio n: 39
messaggio n: 40
messaggio n: 41
messaggio n: 42
messaggio n: 43
messaggio n: 44
messaggio n: 45
messaggio n: 46
messaggio n: 47
messaggio n: 48
messaggio n: 49
messaggio n: 50
messaggio n: 51
messaggio n: 52
messaggio n: 53
messaggio n: 54
messaggio n: 55
messaggio n: 56
messaggio n: 57
messaggio n: 58
messaggio n: 59
messaggio n: 60
messaggio n: 61
messaggio n: 62
me

In [42]:
len(companies)

311

In [44]:
names = []
reviews = []
ratings = []
for company in companies:
    names.append(company.name)
    reviews.append(company.reviews)
    ratings.append(company.rating)

In [48]:
df = pd.DataFrame(columns=['name','rating','review'])

In [50]:
df['name'] = names
df['review'] = reviews
df['rating'] = ratings

In [54]:
path = "C:\\Users\\Tiziano Pacifico\\Desktop\\Applicativi personali\\Sentiment\\Outputs\\"
filename = "Company_DF.json"

In [55]:
df.to_json(path+filename, orient="records", lines=True)