In [1]:
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from enum import Enum, auto
from typing import List
from langchain.chat_models import ChatOpenAI
# from langchain.llms import ChatOpenAI
from langchain.chains import create_tagging_chain
load_dotenv()
# Step 1: Create an instance of the language model
llm = ChatOpenAI()

In [2]:
class Sentiment(str, Enum):
    positive = "positive"
    neutral = "neutral"
    negative = "negative"

class Tone(str, Enum):
    positive = "positive"
    negative = "negative"
    neutral = "neutral"
    mixed = "mixed"
    happy = "happy"
    sad = "sad"
    angry = "angry"
    surprised = "surprised"
    fearful = "fearful"
    confident = "confident"
    uncertain = "uncertain"
    excited = "excited"
    disgusted = "disgusted"
    hopeful = "hopeful"
    cautious = "cautious"

class Language(str, Enum):
    spanish = "spanish"
    english = "english"
    french = "french"
    german = "german"
    italian = "italian"
    dutch = "dutch"
    portuguese = "portuguese"
    russian = "russian"
    chinese = "chinese"
    japanese = "japanese"
    arabic = "arabic"
    hindi = "hindi"
    korean = "korean"
    turkish = "turkish"

class Tags(BaseModel):
    sentiment: Sentiment = Field(..., description="Describes the sentiment of the statement.")
    tone: Tone = Field(
        ...,
        description="Describes the tone of the statement.",
    )
    language: Language = Field(
        ...,
        description="Specifies the language of the text.",
    )

In [3]:
from langchain.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.schema import SystemMessage, HumanMessage

prompt_messages = [
    SystemMessage(
        content=(
            "You are a world class algorithm to identify sentiment of tweets"
            "Answers in specific format of sentiment, tone and language"
        )
    ),
    HumanMessagePromptTemplate.from_template("tweet: {tweet}"),
    HumanMessage(
        content="Tips: Make sure to answer in the correct format. Return at least one from field from schema."
    ),
]
chain_prompt = ChatPromptTemplate(messages=prompt_messages)


In [4]:
# Step 3: Create a tagging chain based on the schema
chain = create_tagging_chain(
    Tags.schema(), 
    llm,
    prompt=chain_prompt
)

In [5]:
# Step 4: Process the text and extract the properties
text = "The chicken was good but staff was lazy."
result = chain.run(text)

In [6]:
result

{'sentiment': 'positive', 'tone': 'neutral', 'language': 'English'}

In [7]:
sentiment = result["sentiment"]
tone = result["tone"]
language = result["language"]

print(sentiment)
print(tone)
print(language)

positive
neutral
English


# Testing

In [5]:
from get_sentiment import get_sentiment
import random
import pandas as pd

In [10]:
get_sentiment(text)

('positive', 'mixed', 'English')

In [10]:
df = pd.read_csv('data/comments.csv')

In [11]:
def random_result(text):
    l1 = [1,2,3,4]
    l2 = [1,2,3,4]
    l3 = [1,2,3,4]
    return [random.choice(l1), random.choice(l2), random.choice(l3)]
df.head()

Unnamed: 0,tweet_id,name,username,author_id,key_term,text,possibly_sensitive,lang,created_at,retweet_count,reply_count,like_count,quote_count,bookmark_count,impression_count,conversation_id
0,1696918408015585758,Mohamed ali,Mohamed29446432,1692233493017714688,1696910426548621479,@ibahzad @M___R1212 @AlAmeenService @DigitalDu...,False,ar,2023-08-30T16:10:50.000Z,0,0,0,0,0,6,1696910426548621479
1,1696913269259116594,Asmaa mohamed Ali,asmaamohamed313,3700192701,1696910426548621479,@ibahzad @AlAmeenService @DigitalDubai الله يج...,False,ar,2023-08-30T15:50:25.000Z,0,0,1,0,0,54,1696910426548621479
2,1696911322724626644,AAAA/O/T,AlaaTurky55511,3386035469,1696910426548621479,@ibahzad @AlAmeenService @DigitalDubai ربنا يس...,False,ar,2023-08-30T15:42:41.000Z,0,0,1,0,0,49,1696910426548621479
3,1697203079102222381,Digital Dubai دبي الرقمية,DigitalDubai,79970951,1697203026480464104,"The ""04"" platform connects you directly to the...",False,en,2023-08-31T11:02:01.000Z,1,0,2,0,0,479,1697203026480464104
4,1697978812309372936,🏅Salaam Fashion,salaamfashion,1112560015079030784,1696909628536131934,@DigitalDubai @AlAmeenService @DIALA_ALI ❤️ FR...,False,en,2023-09-02T14:24:30.000Z,0,0,0,0,0,19,1696909628536131934


In [12]:
df.drop_duplicates(subset=['tweet_id'],inplace=True)

In [13]:
df= df[['tweet_id', 'name', 'username', 'author_id', 'key_term',
       'text', 'possibly_sensitive', 'lang', 'created_at', 'retweet_count',
       'reply_count', 'like_count', 'quote_count', 'bookmark_count',
       'impression_count', 'conversation_id']]
df.shape

(24, 16)

In [None]:
import time
dict_sentiment = dict(
    id=[],
    sentiment=[],
    tone=[],
    language=[]
)

for i in range(df.shape[0]):
    id = df.iloc[i,0]
    text = df.iloc[i,5]
    dict_sentiment['id'].append(id)
    sentiment,tone,language = get_sentiment(text)
    dict_sentiment['sentiment'].append(sentiment)
    dict_sentiment['tone'].append(tone)
    dict_sentiment['language'].append(language)
    print(sentiment,tone,language)
    time.sleep(1.5)
sentiment_df = pd.DataFrame(dict_sentiment)
# df[['sentiment','tone','language']] = df['text'].apply(lambda tweet: pd.Series(get_sentiment(tweet)))

negative pleading arabic
negative neutral arabic
positive neutral arabic
positive informative English
positive emotional English
positive appreciative Arabic
neutral informative English
fear distrust arabic
neutral informative arabic
positive informative arabic


In [33]:
sentiment_df.to_csv('data/sentiment_comment.csv',index=False)

In [36]:
final_df = df.merge(sentiment_df, right_on='id', left_on='tweet_id', how='inner')
final_df = final_df[['tweet_id', 'name', 'username', 'author_id', 'key_term',
       'text', 'possibly_sensitive', 'created_at', 'retweet_count',
       'reply_count', 'like_count', 'quote_count', 'bookmark_count',
       'impression_count', 'conversation_id', 'sentiment','tone', 'language']]

In [38]:
final_df.to_csv('data/comments_final.csv', index=False)

In [39]:
!python send_file_to_cloud.py

/home/fasih/tweeter_api/data [] ['user.csv', 'sentiment.csv', 'final.csv', 'tweet.csv']
