# Bank of Maharashtra Loan Data Scraper POC

**Data Scraping**: It contains a Python script to scrape loan product information from the official Bank of Maharashtra website.

In [1]:
import requests
from bs4 import BeautifulSoup
import re
from src.utils.Logger import Logger
from src.config.ConfigHelper import ConfigHelper
from src.helpers.PromptTemplate import PromptTemplate
from src.helpers.OpenAIHelper import AIHelper
from src.models.FaissClient import FaissClient
__config = ConfigHelper().config
__loggerObj = Logger()
prompt = PromptTemplate(__loggerObj)
ai = AIHelper(__loggerObj, __config)
faiss = FaissClient(logger=__loggerObj,config=__config)

In [2]:
loan_product_url_json = {
    "home-loan": "https://bankofmaharashtra.in/personal-banking/loans/home-loan",
    "super-flexi-housing-loan": "https://bankofmaharashtra.in/maha-super-flexi-housing-loan-scheme",
    "awas-yojana":"https://bankofmaharashtra.in/pradhan-mantri-awas-yojana-2",
    "car-loan": "https://bankofmaharashtra.in/personal-banking/loans/car-loan",
    "two-wheelers": "bankofmaharashtra.in/mahabank-vehicle-loan-scheme-for-two-wheelers-loans",
    "second-hand-car": "bankofmaharashtra.in/mahabank-vehicle-loan-scheme-for-second-hand-car",
    "topup-home-loan":"https://bankofmaharashtra.in/topup-home-loan",
    "education-loan": "https://bankofmaharashtra.in/educational-loans",
    "gold-loan": "https://bankofmaharashtra.in/gold-loan",
    "personal-loan": "https://bankofmaharashtra.in/personal-banking/loans/personal-loan",
    "salary-gain-scheme": "https://bankofmaharashtra.in/salary-gain-scheme",
    "property-loan": "https://bankofmaharashtra.in/loan-against-property",
    "maha-adhaar-loan": "https://bankofmaharashtra.in/maha-adhaar-loan",
    "green-financing-scheme":"https://bankofmaharashtra.in/mahabank-green-financing-scheme",
    "rooftop-solar-panel":"https://bankofmaharashtra.in/mahabank-rooftop-solar-panel-loan",
    "deposit": "bankofmaharashtra.in/lad"
}

In [3]:
def scrape_page_content(url):
    """
    Scrapes the main content from a given URL.
    
    Args:
        url (str): The URL of the loan product page.
        
    Returns:
        str: The cleaned text content of the page, or None if scraping fails.
    """
    try:
        print(f"Fetching {url}...")
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status() # Raises an exception for bad status codes (4xx or 5xx)
        
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # The main content is within a div with class 'inner-page-content'
        content_div = soup.find('div', class_='maincontent')
        
        if not content_div:
            __loggerObj.critical("Warning: Could not find the 'maincontent' div in the HTML.")

        apply_button = content_div.find('a', class_='applybtnBig')
        if apply_button:
            apply_button.decompose() # This removes the tag and its content


        text = content_div.get_text(separator=' ', strip=True)

        cleaned_text = re.sub(r'\s+', ' ', text).strip()
        return cleaned_text

    except requests.exceptions.RequestException as e:
        __loggerObj.error(f"Error fetching {url}: {e}")
        return None

In [4]:
all_loan_data = []

for type,url in loan_product_url_json.items():
    print(url)
    content = scrape_page_content(url)
    if content is None:
        print("Unable to Scrape the data")
        continue
    summarize_text = ai.genrate_from_prompt(model=__config['openai']['models']['answer_relevancy'],prompt=prompt.summarization_prompt(content))
    data_json = {
        "text": summarize_text,
        "category": type}
    if summarize_text == "":
        print(f"Unable to summarize text for {type}: {url}")
        continue
    all_loan_data.append(data_json)

faiss.insert_texts(all_loan_data)

https://bankofmaharashtra.in/personal-banking/loans/home-loan
Fetching https://bankofmaharashtra.in/personal-banking/loans/home-loan...
https://bankofmaharashtra.in/maha-super-flexi-housing-loan-scheme
Fetching https://bankofmaharashtra.in/maha-super-flexi-housing-loan-scheme...
https://bankofmaharashtra.in/pradhan-mantri-awas-yojana-2
Fetching https://bankofmaharashtra.in/pradhan-mantri-awas-yojana-2...
https://bankofmaharashtra.in/personal-banking/loans/car-loan
Fetching https://bankofmaharashtra.in/personal-banking/loans/car-loan...
bankofmaharashtra.in/mahabank-vehicle-loan-scheme-for-two-wheelers-loans
Fetching bankofmaharashtra.in/mahabank-vehicle-loan-scheme-for-two-wheelers-loans...
Unable to Scrape the data
bankofmaharashtra.in/mahabank-vehicle-loan-scheme-for-second-hand-car
Fetching bankofmaharashtra.in/mahabank-vehicle-loan-scheme-for-second-hand-car...
Unable to Scrape the data
https://bankofmaharashtra.in/topup-home-loan
Fetching https://bankofmaharashtra.in/topup-home-lo

True