In [6]:
import requests
from bs4 import BeautifulSoup
import json
from fuzzywuzzy import process, fuzz

# Function to fetch HTML data from a URL
def fetch_data(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.text

# Function to parse stock names from the HTML
def parse_top_gainers(html):
    soup = BeautifulSoup(html, 'html.parser')
    table = soup.find('table', {'class': 'tb10Table'})
    
    if not table:
        raise ValueError("Table not found on the page")

    rows = table.find('tbody').find_all('tr')
    tickers = []
    
    for row in rows:
        td = row.find('a', {'class': 'mtp438CompanyName'})
        if td:
            company_name = td.get_text(strip=True)
            tickers.append(company_name)
    
    return tickers[:10]  # Return only the top 10

# Function to load JSON data and create a mapping from company names to tickers
def load_ticker_mapping(json_file):
    with open(json_file, 'r') as file:
        try:
            data = json.load(file)
            # Print the first few entries to check structure
            print("Sample data:", data[:2])  # Print the first two items for inspection
        except json.JSONDecodeError as e:
            print(f"JSONDecodeError: {e}. Ensure the JSON file is correctly formatted.")
            raise
        except Exception as e:
            print(f"An error occurred: {e}")
            raise

    ticker_map = {}
    for item in data:
        try:
            company_name = item['NAME OF COMPANY']
            symbol = item['SYMBOL']
            ticker_map[company_name] = symbol
        except KeyError as e:
            print(f"KeyError: {e} in item: {item}")
            continue

    return ticker_map

# Function to convert company names to tickers with fuzzy matching
def convert_to_tickers(company_names, ticker_map):
    filtered_tickers = []
    for name in company_names:
        closest_match, score = process.extractOne(name, ticker_map.keys(), scorer=fuzz.ratio)
        if score >= 50:  # Setting a threshold for matching score
            filtered_tickers.append(ticker_map.get(closest_match, 'Ticker Not Found'))
        else:
            filtered_tickers.append('Ticker Not Found')
    return filtered_tickers

# URL for top gainers
top_gainers_url = 'https://groww.in/markets/top-gainers?index=GIDXNIFTY500'
# Path to the JSON file
json_file_path = './nse.json'  # Update this path

# Fetch, parse, and extract top gainers tickers
top_gainers_html = fetch_data(top_gainers_url)
top_gainers_names = parse_top_gainers(top_gainers_html)

# Load ticker mapping from JSON file
ticker_mapping = load_ticker_mapping(json_file_path)

# Convert top gainers names to tickers with filtering
top_gainers_tickers = convert_to_tickers(top_gainers_names, ticker_mapping)

# Return the tickers as JSON
result = json.dumps({'top_gainers_tickers': top_gainers_tickers}, indent=4)
print("Top Gainers Names:", top_gainers_names)
print("Result:", result)


Sample data: [{'SYMBOL': '20MICRONS', 'NAME OF COMPANY': '20 Microns Limited', 'SERIES': 'EQ', 'DATE OF LISTING': '06-OCT-2008', 'PAID UP VALUE': 5, 'MARKET LOT': 1, 'ISIN NUMBER': 'INE144J01027', 'FACE VALUE': 5}, {'SYMBOL': '21STCENMGM', 'NAME OF COMPANY': '21st Century Management Services Limited', 'SERIES': 'BE', 'DATE OF LISTING': '03-MAY-1995', 'PAID UP VALUE': 10, 'MARKET LOT': 1, 'ISIN NUMBER': 'INE253B01015', 'FACE VALUE': 10}]
Top Gainers Names: ['New India Assurance Company', 'General Insurance Corporation of India', 'Paytm - One97 Communications Ltd', 'Shriram Finance', 'Network18 Media & Investments', 'Solar Industries India', 'Amara Raja Energy & Mobility', 'Mphasis', 'Ashok Leyland', 'Bharat Forge']
Result: {
    "top_gainers_tickers": [
        "NIACL",
        "GICRE",
        "PAYTM",
        "SHRIRAMFIN",
        "NETWORK18",
        "SOLARINDS",
        "ARE&M",
        "MPHASIS",
        "ASHOKLEY",
        "BHARATFORG"
    ]
}
