In [40]:
!pip install llama-index==0.10.18 llama-index-llms-groq==0.1.3 groq==0.4.2 llama-index-embeddings-huggingface==0.2.0



In [41]:
!pip install spacy textblob transformers torch



In [42]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    ServiceContext,
    load_index_from_storage
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.groq import Groq
import warnings
import spacy
from textblob import TextBlob
from transformers import MarianMTModel, MarianTokenizer

warnings.filterwarnings('ignore')

In [43]:
from google.colab import userdata
GROQ_API_KEY = "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"


In [44]:
nlp_spacy = spacy.load("en_core_web_sm")

translation_model_name = "Helsinki-NLP/opus-mt-en-ur"
translator_model = MarianMTModel.from_pretrained(translation_model_name)
translator_tokenizer = MarianTokenizer.from_pretrained(translation_model_name)

In [45]:
def translate_text(text, source_lang, target_lang):
    tokenizer = translator_tokenizer
    model = translator_model
    inputs = tokenizer(text, return_tensors="pt", padding=True)
    translated = model.generate(**inputs)
    return tokenizer.decode(translated[0], skip_special_tokens=True)

In [46]:
def analyze_sentiment(text):
    return TextBlob(text).sentiment

reader = SimpleDirectoryReader(input_files=["/content/constitution.pdf"])
documents = reader.load_data()

text_splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=200)
nodes = text_splitter.get_nodes_from_documents(documents, show_progress=True)

embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
llm = Groq(model="llama-3.1-8b-instant", api_key=GROQ_API_KEY)
service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm)

vector_index = VectorStoreIndex.from_documents(documents, show_progress=True, service_context=service_context, node_parser=nodes)
vector_index.storage_context.persist(persist_dir="./storage_mini")
storage_context = StorageContext.from_defaults(persist_dir="./storage_mini")
index = load_index_from_storage(storage_context, service_context=service_context)
query_engine = index.as_query_engine(service_context=service_context)

Parsing nodes:   0%|          | 0/224 [00:00<?, ?it/s]

Parsing nodes:   0%|          | 0/224 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/224 [00:00<?, ?it/s]

In [47]:
def process_query(query, use_translation=False, source_lang="en", target_lang="ur"):
    if use_translation:
        query = translate_text(query, source_lang, target_lang)
    doc = nlp_spacy(query)
    sentiment = analyze_sentiment(query)

    print("\nPOS Tags:")
    for token in doc:
        print(f"{token.text} -> {token.pos_}")

    print("\nNamed Entities:")
    for ent in doc.ents:
        print(f"{ent.text} -> {ent.label_}")

    print("\nSentiment Analysis:")
    print(f"Polarity: {sentiment.polarity}, Subjectivity: {sentiment.subjectivity}")

    response = query_engine.query(query)
    return response.response

In [51]:
# @title Default title text
query = "What is the role of the judiciary in ensuring fundamental rights?"
response = process_query(query, use_translation=False)
print("\nChatbot Response:")
print(response)


POS Tags:
What -> PRON
is -> AUX
the -> DET
role -> NOUN
of -> ADP
the -> DET
judiciary -> NOUN
in -> ADP
ensuring -> VERB
fundamental -> ADJ
rights -> NOUN
? -> PUNCT

Named Entities:

Sentiment Analysis:
Polarity: 0.0, Subjectivity: 0.0

Chatbot Response:
The judiciary plays a crucial role in upholding and protecting the fundamental rights of citizens. It ensures that the government and other institutions do not infringe upon these rights, and that individuals are treated fairly and justly under the law. The judiciary's role is to interpret the law and make decisions that promote justice and equality. In doing so, it acts as a check on the power of the other branches of government, ensuring that they do not abuse their authority and that the rights of citizens are protected.


In [54]:
from sacrebleu import corpus_bleu

# Example test cases for translation (English to Roman Urdu)
test_cases = [
    ("What are the fundamental rights guaranteed by the Constitution?", "Aain ke mutabiq bunyadi haqooq kya hain?"),
    ("Who is responsible for enforcing the laws?", "Qanoon nafiz karne ka zimmedar kaun hai?"),
    ("What is the role of the judiciary in Pakistan?", "Pakistan mein adliya ka kya kirdar hai?"),
]

# Evaluate BLEU score
def evaluate_translation(test_cases):
    references = [ref for _, ref in test_cases]  # Reference translations
    predictions = []

    for source, _ in test_cases:
        translated = translate_text(source, source_lang="en", target_lang="ur")
        predictions.append(translated)
        print(f"Source: {source}")
        print(f"Translated: {translated}")

    # Calculate BLEU score
    bleu = corpus_bleu(predictions, [references])
    print("\nBLEU Score:", bleu.score)

# Run evaluation
evaluate_translation(test_cases)

Source: What are the fundamental rights guaranteed by the Constitution?
Translated: بنیادی حقوق کس چیز کی ضمانت ہے ؟
Source: Who is responsible for enforcing the laws?
Translated: قوانین کو ذمہ‌دار بنانے کا ذمہ‌دار کون ہے ؟
Source: What is the role of the judiciary in Pakistan?
Translated: پاکستان میں نسلیاتی امتیاز کا کیا کردار ہے ؟

BLEU Score: 0.0


In [53]:
!pip install sacrebleu

Collecting sacrebleu
  Downloading sacrebleu-2.4.3-py3-none-any.whl.metadata (51 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/51.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-3.0.0-py3-none-any.whl.metadata (8.5 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Downloading sacrebleu-2.4.3-py3-none-any.whl (103 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.0/104.0 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Downloading portalocker-3.0.0-py3-none-any.whl (19 kB)
Installing collected packages: portalocker, colorama, sacrebleu
Successfully installed colorama-0.4.6 portalocker-3.0.0 sacrebleu-2.4.3


In [55]:
from sklearn.metrics import accuracy_score, classification_report

# Ground truth sentiment labels and predicted labels
true_labels = ["positive", "negative", "neutral"]
queries = [
    "The Constitution ensures women's rights.",
    "The government has failed to protect minorities.",
    "What is the role of the judiciary?"
]
predicted_labels = [analyze_sentiment(query).polarity for query in queries]

# Map polarity to sentiment labels
def polarity_to_label(polarity):
    if polarity > 0:
        return "positive"
    elif polarity < 0:
        return "negative"
    else:
        return "neutral"

predicted_labels = [polarity_to_label(p) for p in predicted_labels]

# Evaluate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy: {accuracy}")
print(classification_report(true_labels, predicted_labels))

Accuracy: 0.6666666666666666
              precision    recall  f1-score   support

    negative       1.00      1.00      1.00         1
     neutral       0.50      1.00      0.67         1
    positive       0.00      0.00      0.00         1

    accuracy                           0.67         3
   macro avg       0.50      0.67      0.56         3
weighted avg       0.50      0.67      0.56         3



In [56]:
from sklearn.metrics import precision_score

# Sample user queries and ground truth relevant documents
queries = [
    "What are the fundamental rights?",
    "What is the role of the judiciary?",
    "What are the powers of the executive?"
]
true_documents = [
    ["Fundamental Rights Section"],
    ["Judiciary Section"],
    ["Executive Powers Section"]
]

# Retrieve documents using the query engine
retrieved_documents = [query_engine.query(query).response for query in queries]

# Check if retrieved documents are relevant
def is_relevant(retrieved, true):
    return any(doc in true for doc in retrieved)

relevance = [is_relevant([retrieved], true) for retrieved, true in zip(retrieved_documents, true_documents)]

# Calculate precision
precision = sum(relevance) / len(relevance)
print(f"Precision@1: {precision}")

Precision@1: 0.0
