In [1]:
import pandas as pd

### Dataset merging into one csv

In [2]:
def merge_dfs(df_list: list[pd.DataFrame]) -> pd.DataFrame:
    merged_df = pd.concat(df_list, ignore_index=True)
    merged_df.drop_duplicates(subset=["content"], inplace=True)
    merged_df.dropna(subset=["content"], inplace=True)
    merged_df.to_csv("./datasets/merged_reviews.csv", index_label="id")
    return merged_df

In [3]:
df_list = [
    pd.read_csv("./datasets/Al_Quran_filtered_reviews.csv"),
    pd.read_csv("./datasets/Muslim_Bangla_filtered_reviews.csv"),
    pd.read_csv("./datasets/Muslim_Day_filtered_reviews.csv"),
]

In [4]:
# merged_df = merge_dfs(df_list)
merged_df = pd.read_csv("./datasets/merged_reviews.csv")
merged_df.shape

(74285, 5)

### LLM prompting

#### Output schema

In [5]:
from pydantic import BaseModel
from typing import Literal
import json


sentiment_types = Literal["positive", "negative", "neutral"]


class Aspect(BaseModel):
    aspect: str
    polarity: sentiment_types


class Output(BaseModel):
    translation: str = ""
    overall_sentiment: sentiment_types
    aspects: list[Aspect]


output_schema = Output.model_json_schema()
print(json.dumps(output_schema, indent=2))

{
  "$defs": {
    "Aspect": {
      "properties": {
        "aspect": {
          "title": "Aspect",
          "type": "string"
        },
        "polarity": {
          "enum": [
            "positive",
            "negative",
            "neutral"
          ],
          "title": "Polarity",
          "type": "string"
        }
      },
      "required": [
        "aspect",
        "polarity"
      ],
      "title": "Aspect",
      "type": "object"
    }
  },
  "properties": {
    "translation": {
      "default": "",
      "title": "Translation",
      "type": "string"
    },
    "overall_sentiment": {
      "enum": [
        "positive",
        "negative",
        "neutral"
      ],
      "title": "Overall Sentiment",
      "type": "string"
    },
    "aspects": {
      "items": {
        "$ref": "#/$defs/Aspect"
      },
      "title": "Aspects",
      "type": "array"
    }
  },
  "required": [
    "overall_sentiment",
    "aspects"
  ],
  "title": "Output",
  "type": "object"
}


#### Prompting

In [13]:
from tqdm import tqdm
from utils import get_sentiment, clean_text
from dotenv import load_dotenv
import os
from cerebras.cloud.sdk import Cerebras

load_dotenv()

True

In [14]:
client = Cerebras(
    api_key=os.getenv("CEREBRAS_API_KEY_0")
)

In [None]:
with open("./datasets/contents.csv", "r") as f:
    with open("./out/apc_dataset.txt", "w") as fw:
        reviews = [clean_text(line) for line in f.readlines()]
        for review in tqdm(reviews, desc="Processing reviews"):
            if not review:
                continue

            llm_output = get_sentiment(client, review)
            

In [11]:
# RateLimitError: code 'token_quota_exceeded'
# RateLimitError: code 'request_quota_exceeded'
# Error code: 429 - {'message': 'Requests per hour limit exceeded - too many requests sent.', 'type': 'too_many_requests_error', 'param': 'quota', 'code': 'request_quota_exceeded'}

#### Experimentation

In [2]:
reviews = [
    "Very good application to learn Al Quran word by word.",
    "This app is the best and beautiful made. Thanks to those who made it. It really shows that you guys are doing your work for Allah ﷻ. And may Allah ﷻ reward you all.",
    "Best",
    "First of all, I thank Allah that we can read the Quran through apps.However, this app is different from other apps.In this app, you can read the Quran and learn about every sentence along with its meaning. I would like to thank the person who made this app.Above all, I pray that you may be engaged in all the works of Allah Almighty for the benefit of His servants, and that Allah may accept you as a responsible person.",
    "Wonderful quran app with a very clear translation and tafsir of the Quran.",
]

In [9]:
from cerebras.cloud.sdk import Cerebras
from pprint import pprint
from dotenv import load_dotenv
import os
from utils import get_sentiment, clean_text

load_dotenv()

True

In [10]:
client = Cerebras(api_key=os.getenv("CEREBRAS_API_KEY_0"))

In [11]:
def test():
    for review in reviews:
        text = clean_text(review)
        if not text:
            continue
        pprint(f"Review: {text}")
        pprint(f"ABSA output: {get_sentiment(client, text)['aspects']}")
        print("-------------------------------------------------------------------------------")

test()

'Review: very good application to learn al quran word by word'
("ABSA output: [{'text': 'very good $T$ to learn al quran word by word', "
 "'aspect': 'application', 'polarity': 'Positive'}]")
-------------------------------------------------------------------------------
('Review: this app is the best and beautiful made thanks to those who made it '
 'it really shows that you guys are doing your work for allah  and may allah  '
 'reward you all')
("ABSA output: [{'text': 'this $T$ is the best and beautiful made thanks to "
 'those who made it it really shows that you guys are doing your work for '
 "allah  and may allah  reward you all', 'aspect': 'app', 'polarity': "
 "'Positive'}]")
-------------------------------------------------------------------------------
'Review: best'
'ABSA output: None'
-------------------------------------------------------------------------------
('Review: first of all i thank allah that we can read the quran through '
 'appshowever this app is different f