In [1]:
!pip install transformers faiss-cpu datasets

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m41.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading datasets-3.5.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from datasets import load_dataset
from transformers import GPT2Tokenizer, GPT2LMHeadModel


dataset = load_dataset("kshitij230/Indian-Law")



model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Indian-Law.csv:   0%|          | 0.00/15.2M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25607 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
import faiss
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer


texts = [f"{example['Instruction']} {example['Response']}" for example in dataset['train']]


vectorizer = TfidfVectorizer()
tfidf_vectors = vectorizer.fit_transform(texts)


tfidf_vectors = tfidf_vectors.toarray()

# Build FAISS index
dimension = tfidf_vectors.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(tfidf_vectors)

In [None]:
def retrieve_documents(query, top_k=5):
    
    query_vector = vectorizer.transform([query]).toarray()

    
    distances, indices = index.search(query_vector, top_k)

    
    retrieved_docs = [texts[idx] for idx in indices[0]]
    return retrieved_docs

def generate_response(query, retrieved_responses, max_new_tokens=700):
    
    context = "\n".join(retrieved_responses)
    input_text = f"Query: {query}\n\nContext:\n{context}"

    
    inputs = tokenizer(input_text, return_tensors="pt", max_length=200, truncation=True)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,  # Control the length of the generated text
        num_return_sequences=1,         # Generate only one response
        no_repeat_ngram_size=2,          # Avoid repeating phrases
        early_stopping=True,             # Stop generation early if appropriate
    )


    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    
    response_lines = splitter(response, max_line_length=50)

    
    formatted_response = "\n".join(response_lines)

    return formatted_response

def splitter(text, max_line_length):
    
    words = text.split()  
    lines = []
    current_line = ""

    for word in words:
    
        if len(current_line) + len(word) + 1 <= max_line_length:
            current_line += f" {word}" if current_line else word
        else:
            
            lines.append(current_line)
            current_line = word


    if current_line:
        lines.append(current_line)

    return lines

def rag_pipeline(query, top_k=5):
    \
    retrieved_docs = retrieve_documents(query, top_k)
    response = generate_response(query, retrieved_docs)
    return response

In [5]:
!pip install groq

Collecting groq
  Downloading groq-0.22.0-py3-none-any.whl.metadata (15 kB)
Downloading groq-0.22.0-py3-none-any.whl (126 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.7/126.7 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.22.0


In [None]:


import os
from groq import Groq

# Initialize Groq client
client = Groq(api_key='your key')

def rag_with_groq(query):
    

    rag_response = rag_pipeline(query)
    #print("RAG Response:", rag_response)


    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[
            {
                "role": "system",
                "content": f"You are a highly knowledgeable legal assistant specializing in Indian law.summarize and filter important point if you dont found relevant points from the context then use your own knowledge make sure to specify the laws and articles,the input context : {rag_response} , dont answer anything out indian law domain "
            },
            {
                "role": "user",
                "content": f"### Question: {query}\n\n### Relevant Context:\n{rag_response}\n\n### Answer:"
            }
        ],
        max_tokens=500,
        temperature=0.5
    )


    return response.choices[0].message.content

In [7]:
print("""
                      ___.   .__                         __
  ___________    _____\_ |__ |  |______________    _____/  |______
 /  ___/\__  \  /     \| __ \|  |  \_  __ \__  \  /    \   __\__  \
 \___ \  / __ \|  Y Y  \ \_\ \   Y  \  | \// __ \|   |  \  |  / __ \_
/____  >(____  /__|_|  /___  /___|  /__|  (____  /___|  /__| (____  /
     \/      \/      \/    \/     \/           \/     \/          \/ """)




                      ___.   .__                         __
  ___________    _____\_ |__ |  |______________    _____/  |______
 /  ___/\__  \  /     \| __ \|  |  \_  __ \__  \  /    \   __\__   \___ \  / __ \|  Y Y  \ \_\ \   Y  \  | \// __ \|   |  \  |  / __ \_
/____  >(____  /__|_|  /___  /___|  /__|  (____  /___|  /__| (____  /
     \/      \/      \/    \/     \/           \/     \/          \/ 


In [8]:

key =".god"
while True:
  query = input("Ok tell me what happend  : ")
  if query.lower() == 'q':
    break

  if key in query:
        rag_response = rag_pipeline(query)
        print("RAG Response:", rag_response)
        response = rag_with_groq(query)


        print("Mr.wakeel sahab:\n", response)
  else:

      response = rag_with_groq(query)


      print("Mr.wakeel sahab :\n", response)

Ok tell me what happend  : my wife is having a physical affair with another man


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Mr.wakeel sahab :
 **Adultery under Indian Law**

As per Section 497 of the Indian Penal Code (IPC), adultery is defined as sexual intercourse between a person and the wife of another person, without the consent or connivance of the husband. The punishment for adultery can include imprisonment for up to five years, or a fine, or both.

**Key Points:**

1. **Section 497**: Covers the offense of adultery, where a person has sexual intercourse with the wife of another person without the husband's consent.
2. **Punishment**: Imprisonment for up to five years, or a fine, or both.
3. **Wife's Liability**: The wife is not punishable as an abettor in such cases.
4. **Definition of 'Man' and 'Woman'**: Section 10 of the IPC defines 'Man' as a male human being of any age and 'Woman' as a female human being of any age.

**Relevant Sections:**

1. **Section 498**: Deals with enticing, taking away, or detaining a married woman with criminal intent.
2. **Section 496**: Relates to the offense of rape