In [1]:
import requests
from bs4 import BeautifulSoup
import json
from fuzzywuzzy import process, fuzz

# Ensure Levenshtein is used for faster fuzzy matching
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

# Function to fetch HTML data from a URL
def fetch_data(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.text

# Function to parse stock tickers from the HTML
def parse_top_losers(html):
    soup = BeautifulSoup(html, 'html.parser')
    table = soup.find('table', {'class': 'tb10Table'})
    
    if not table:
        raise ValueError("Table not found on the page")

    rows = table.find('tbody').find_all('tr')
    tickers = []
    
    for row in rows:
        td = row.find('a', {'class': 'mtp438CompanyName'})
        if td:
            company_name = td.get_text(strip=True)
            tickers.append(company_name)
    
    return tickers[:10]  # Return only the top 10

# Function to load JSON data and create a mapping from company names to tickers
def load_ticker_mapping(json_file):
    with open(json_file, 'r') as file:
        data = json.load(file)
    ticker_map = {item['NAME OF COMPANY']: item['SYMBOL'] for item in data}
    return ticker_map

# Function to convert company names to tickers with filtering using fuzzy matching
def convert_to_tickers(company_names, ticker_map):
    filtered_tickers = []
    for name in company_names:
        closest_match, score = process.extractOne(name, ticker_map.keys(), scorer=fuzz.ratio)
        if score >= 50:  # Setting a threshold for matching score
            filtered_tickers.append(ticker_map.get(closest_match, 'Ticker Not Found'))
        else:
            filtered_tickers.append('Ticker Not Found')
    return filtered_tickers

# URL for top losers
top_losers_url = 'https://groww.in/markets/top-losers?index=GIDXNIFTY500'
# Path to the JSON file
json_file_path = './nse.json'  # Update this path

# Fetch, parse, and extract top losers tickers
top_losers_html = fetch_data(top_losers_url)
top_losers_names = parse_top_losers(top_losers_html)

# Load ticker mapping from JSON file
ticker_mapping = load_ticker_mapping(json_file_path)

# Convert top losers names to tickers with filtering
top_losers_tickers = convert_to_tickers(top_losers_names, ticker_mapping)

# Return the tickers as JSON
result = json.dumps({'top_losers_tickers': top_losers_tickers}, indent=4)
print("Top Losers Names:", top_losers_names)
print("Result:", result)


Top Losers Names: ['MMTC', 'Cyient', 'Route Mobile', 'Mankind Pharma', 'Rail Vikas Nigam', 'Ramkrishna Forgings', 'Tata Teleservices (Maharashtra)', 'United Breweries', 'Sobha', 'Federal Bank']
Result: {
    "top_losers_tickers": [
        "MMTC",
        "CYIENT",
        "ROUTE",
        "MANKIND",
        "RVNL",
        "RKFORGE",
        "TTML",
        "UBL",
        "SOBHA",
        "FEDERALBNK"
    ]
}
