In [4]:
!pip install -q git+https://github.com/huggingface/transformers.git@main
!pip install -q accelerate bitsandbytes

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone


In [1]:
## Installation of libraris
!pip install bs4

Collecting bs4
  Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Installing collected packages: bs4
Successfully installed bs4-0.0.2


In [2]:
## Import libraries

import requests
from bs4 import BeautifulSoup
import csv

# List of maximum verses for each chapter
max_slokas = [
    47, 72, 43, 42, 29, 47, 30, 28, 34, 42, 55, 20, 35, 27, 20, 24, 28, 78
]

# Function to scrape Bhagavad Gita verses and save in CSV
def scrape_bhagavad_gita(chapter):
    base_url = "https://www.gitasupersite.iitk.ac.in/srimad"

    # Get the max verses for the specified chapter
    max_verse = max_slokas[chapter - 1]  # Get the max verses for the current chapter

    # Open CSV file to write the data
    with open(f'chapter_{chapter}_translation.csv', 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['Verse Number', 'Translation']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        # Write header row
        writer.writeheader()

        # Loop through each verse in the chapter
        for verse in range(1, max_verse + 1):
            params = {
                'language': 'dv',
                'field_chapter_value': chapter,
                'field_nsutra_value': verse,
                'etsiva': '1',
                'choose': '1'
            }

            # Send GET request to fetch the page content
            response = requests.get(base_url, params=params)

            if response.status_code != 200:
                print(f"Failed to retrieve Chapter {chapter}, Verse {verse}.")
                continue

            # Parse the content with BeautifulSoup
            soup = BeautifulSoup(response.content, 'html.parser')

            # Find the translation content (looking for the div with class 'views-field views-field-field-etsiva')
            verse_div = soup.find('div', class_='views-field views-field-field-etsiva')

            if verse_div:
                # Extract the translation text
                verse_text = verse_div.get_text(separator="\n", strip=True)

                # Remove unwanted text like "English Translation By Swami Sivananda"
                if "English Translation By Swami Sivananda" in verse_text:
                    verse_text = verse_text.split("English Translation By Swami Sivananda")[1]

                # Remove the verse number at the start (e.g., "1.10")
                verse_text = ' '.join(verse_text.split()[1:])

                # Remove any double quotes from the translation
                verse_text = verse_text.replace('"', '')

                # Remove the verse number at the end (e.g., "।।1.10।।")
                if "।।" in verse_text:
                    verse_text = verse_text.split("।।")[0]

                # Write the verse number and translation to the CSV file
                writer.writerow({'Verse Number': verse, 'Translation': verse_text.strip()})
                print(f"Chapter {chapter}, Verse {verse} saved successfully.")
            else:
                print(f"No verse content found for Chapter {chapter}, Verse {verse}.")

    print(f"Scraping for Chapter {chapter} completed.")

# Ask the user which chapter to scrape
try:
    chapter_number = int(input("Enter the chapter number you want to scrape (1-18): "))
    if chapter_number < 1 or chapter_number > 18:
        print("Invalid chapter number! Please enter a number between 1 and 18.")
    else:
        # Start scraping the specified chapter
        scrape_bhagavad_gita(chapter_number)
except ValueError:
    print("Invalid input! Please enter a valid number.")


Enter the chapter number you want to scrape (1-18): 1
Chapter 1, Verse 1 saved successfully.
Chapter 1, Verse 2 saved successfully.
Chapter 1, Verse 3 saved successfully.
Chapter 1, Verse 4 saved successfully.
Chapter 1, Verse 5 saved successfully.
Chapter 1, Verse 6 saved successfully.
Chapter 1, Verse 7 saved successfully.
Chapter 1, Verse 8 saved successfully.
Chapter 1, Verse 9 saved successfully.
Chapter 1, Verse 10 saved successfully.
Chapter 1, Verse 11 saved successfully.
Chapter 1, Verse 12 saved successfully.
Chapter 1, Verse 13 saved successfully.
Chapter 1, Verse 14 saved successfully.
Chapter 1, Verse 15 saved successfully.
Chapter 1, Verse 16 saved successfully.
Chapter 1, Verse 17 saved successfully.
Chapter 1, Verse 18 saved successfully.
Chapter 1, Verse 19 saved successfully.
Chapter 1, Verse 20 saved successfully.
Chapter 1, Verse 21 saved successfully.
Chapter 1, Verse 22 saved successfully.
Chapter 1, Verse 23 saved successfully.
Chapter 1, Verse 24 saved successfu

In [6]:
from huggingface_hub import login
login('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')

In [7]:
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from transformers import AutoTokenizer

# base = "mistralai/Mistral-7B-v0.1"
# base = 'meta-llama/Meta-Llama-3-8B'
base = 'meta-llama/Meta-Llama-3-8B-Instruct'


quant_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_compute_dtype = torch.bfloat16,
    bnb_4bit_use_double_quant = True,
    bnb_4bit_quant_type = "nf4"
)

model = AutoModelForCausalLM.from_pretrained(
    base,
    quantization_config=quant_config,
    use_cache = False,
    device_map = "auto"
)

tokenizer = AutoTokenizer.from_pretrained(
  base,
  padding_side="right",
  add_bos_token = True,
  add_eos_token=True
)
tokenizer.pad_token = tokenizer.eos_token

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

In [10]:
from transformers import pipeline
llm = pipeline(
    model = model,
    tokenizer=tokenizer,
    task='text-generation',
    temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=200,  # mex number of tokens to generate in the output
    # repetition_penalty=1.4  # without this output begins repeating
)

Device set to use cuda:0


In [11]:
import pandas as pd

# Function to combine all translations into a single string
def combine_translations(csv_file_path):
    # Load the CSV data into a DataFrame
    df = pd.read_csv(csv_file_path, header=None, names=['Verse Number', 'Translation'])

    # Combine all translations into a single string, separated by spaces or newlines
    combined_text = " ".join(df['Translation'].astype(str))

    return combined_text

# Example usage
csv_file_path = 'chapter_1_translation.csv'  # Path to your CSV file
combined_data = combine_translations(csv_file_path)




In [13]:
template = \
f'''
Following are the verses of a particular chapter of Shreemad Bhagvad Gita.
You being a good AI assistant is asked to summarize the following :

Verses : {combined_data}
'''

res = llm(template)

In [15]:
print(res[0]['generated_text'])


Following are the verses of a particular chapter of Shreemad Bhagvad Gita. 
You being a good AI assistant is asked to summarize the following :

Verses : Translation Dhritarashtra said What did my people and the sons of Pandu do when they had assembled together eager for battle on the holy plain of Kurukshetra, O Sanjaya. Sanjaya said Having seen the army of the Pandavas drawn up in battle-array, King Duryodhana then approached his teacher (Drona) and spoke these words. Behold, O Teacher! this mighty army of the sons of Pandu, arrayed by the son of Drupada, thy wise disciple. Here are heroes, mighty archers, eal in battle to Bhima and Arjuna, Yoyudhana (Satyaki), Virata and Drupada, of the great car (mighty warriors). Dhrishtaketu, chekitana and the valiant king of Kasi, Purujit and Kuntibhoja and Saibya, the best men. The strong Yodhamanyu and the brave Uttamaujas, the son of Subhadra (Abhimanyu, the son of Subhadra and Arjuna), and the sons of Draupadi, all of great chariots (great 