In [3]:
import yfinance as yf
import pandas as pd
import requests
from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration, RagTokenForGeneration
from datasets import Dataset
from bs4 import BeautifulSoup
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
'''{
    }'''
company_tickers = {
    'HDFC Bank Ltd.': 'HDFCBANK.NS',
    'Reliance Industries Ltd.': 'RELIANCE.NS',
    'ICICI Bank Ltd.': 'ICICIBANK.NS',
    'Infosys Ltd.': 'INFY.NS',
    'Larsen & Toubro Ltd.': 'LT.NS',
    'Tata Consultancy Services Ltd.': 'TCS.NS',
    'ITC Ltd.': 'ITC.NS',
    'Bharti Airtel Ltd.': 'BHARTIARTL.NS',
    'Axis Bank Ltd.': 'AXISBANK.NS',
    'State Bank of India': 'SBIN.NS',
    'Mahindra & Mahindra Ltd.': 'M&M.NS',
    'Kotak Mahindra Bank Ltd.': 'KOTAKBANK.NS',
    'Hindustan Unilever Ltd.': 'HINDUNILVR.NS',
    'Bajaj Finance Ltd.': 'BAJFINANCE.NS',
    'NTPC Ltd.': 'NTPC.NS',
    'Tata Motors Ltd.': 'TATAMOTORS.NS',
    'Sun Pharmaceutical Industries Ltd.': 'SUNPHARMA.NS',
    'Maruti Suzuki India Ltd.': 'MARUTI.NS',
    'HCL Technologies Ltd.': 'HCLTECH.NS',
    'Power Grid Corporation of India Ltd.': 'POWERGRID.NS',
    'Tata Steel Ltd.': 'TATASTEEL.NS',
    'Titan Company Ltd.': 'TITAN.NS',
    'UltraTech Cement Ltd.': 'ULTRACEMCO.NS',
    'Asian Paints Ltd.': 'ASIANPAINT.NS',
    'Adani Ports and Special Economic Zone Ltd.': 'ADANIPORTS.NS',
    'Coal India Ltd.': 'COALINDIA.NS',
    'Oil & Natural Gas Corporation Ltd.': 'ONGC.NS',
    'Bajaj Auto Ltd.': 'BAJAJ-AUTO.NS',
    'Hindalco Industries Ltd.': 'HINDALCO.NS',
    'Grasim Industries Ltd.': 'GRASIM.NS',
    'IndusInd Bank Ltd.': 'INDUSINDBK.NS',
    'Nestle India Ltd.': 'NESTLEIND.NS',
    'Tech Mahindra Ltd.': 'TECHM.NS',
    'JSW Steel Ltd.': 'JSWSTEEL.NS',
    'Bajaj Finserv Ltd.': 'BAJAJFINSV.NS',
    'Adani Enterprises Ltd.': 'ADANIENT.NS',
    'Shriram Finance Ltd.': 'SHRIRAMFIN.NS',
    'Cipla Ltd.': 'CIPLA.NS',
    'Dr. Reddy\'s Laboratories Ltd.': 'DRREDDY.NS',
    'Hero MotoCorp Ltd.': 'HEROMOTOCO.NS',
    'Wipro Ltd.': 'WIPRO.NS',
    'Tata Consumer Products Ltd.': 'TATACONSUM.NS',
    'SBI Life Insurance Company Ltd.': 'SBILIFE.NS',
    'Britannia Industries Ltd.': 'BRITANNIA.NS',
    'Eicher Motors Ltd.': 'EICHERMOT.NS',
    'Apollo Hospitals Enterprise Ltd.': 'APOLLOHOSP.NS',
    'HDFC Life Insurance Company Ltd.': 'HDFCLIFE.NS',
    'Bharat Petroleum Corporation Ltd.': 'BPCL.NS',
    'Divi\'s Laboratories Ltd.': 'DIVISLAB.NS',
    'LTIMindtree Ltd.': 'LTIM.NS'
}


In [85]:
for i in company_tickers.values():
    print(i)

HDFCBANK.NS
RELIANCE.NS
ICICIBANK.NS
INFY.NS
LT.NS
TCS.NS
ITC.NS
BHARTIARTL.NS
AXISBANK.NS
SBIN.NS
M&M.NS
KOTAKBANK.NS
HINDUNILVR.NS
BAJFINANCE.NS
NTPC.NS
TATAMOTORS.NS
SUNPHARMA.NS
MARUTI.NS
HCLTECH.NS
POWERGRID.NS
TATASTEEL.NS
TITAN.NS
ULTRACEMCO.NS
ASIANPAINT.NS
ADANIPORTS.NS
COALINDIA.NS
ONGC.NS
BAJAJ-AUTO.NS
HINDALCO.NS
GRASIM.NS
INDUSINDBK.NS
NESTLEIND.NS
TECHM.NS
JSWSTEEL.NS
BAJAJFINSV.NS
ADANIENT.NS
SHRIRAMFIN.NS
CIPLA.NS
DRREDDY.NS
HEROMOTOCO.NS
WIPRO.NS
TATACONSUM.NS
SBILIFE.NS
BRITANNIA.NS
EICHERMOT.NS
APOLLOHOSP.NS
HDFCLIFE.NS
BPCL.NS
DIVISLAB.NS
LTIM.NS


In [None]:
def get_news_from_yahoo_finance(ticker):
    url = f'https://finance.yahoo.com/quote/{ticker}/news/'
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
        }
        response = requests.get(url, headers=headers)
        if response.status_code != 200:
            print(f"Failed to retrieve data for {ticker}. Status code: {response.status_code}")
            return []

        soup = BeautifulSoup(response.content, 'html.parser')

        # Find all div elements with class "content yf-1044anq"
        news_items = soup.find_all('div', class_='content yf-1044anq')

        news_data = []
        for item in news_items:
            title = item.find('h3', class_='clamp yf-1044anq')
            news = item.find('p', class_='clamp yf-1044anq')

            if title and news:
                news_data.append({
                    'title': title.get_text(strip=True),
                    'news': news.get_text(strip=True)
                })

        return news_data

    except Exception as e:
        print(f"An error occurred for {ticker}: {str(e)}")
        return []

In [None]:
def analyze_sentiment(text):
    blob = TextBlob(text)
    return blob.sentiment.polarity


In [None]:

# Loop through each ticker
for company_name, ticker in company_tickers.items():
    print(f"Collecting news for {company_name} ({ticker})...")
    news_data = get_news_from_yahoo_finance(ticker)
    
    for news_item in news_data:
        title = news_item['title']
        news = news_item['news']
        sentiment = analyze_sentiment(news)
        
        print(f"Title: {title}")
        print(f"News: {news}")
        print(f"Sentiment: {'Positive' if sentiment > 0 else 'Negative' if sentiment < 0 else 'Neutral'}")
        print("\n")

In [5]:
# working

import requests
from bs4 import BeautifulSoup
import time


news_data = []

for ticker in company_tickers.values():
    print(ticker)
    news_url = f"https://finance.yahoo.com/quote/{ticker}/news/"
    # Headers to mimic browser behavior
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    # Step 1: Request the main news page for Reliance Industries
    response = requests.get(news_url, headers=headers)
    time.sleep(5)
    # Step 2: Parse the page with BeautifulSoup
    soup = BeautifulSoup(response.content, "html.parser")

    # Step 3: Find all <div> tags with the relevant class and extract the links
    news_links = soup.find_all('div', class_="content yf-1e4au4k")
    print(news_links)
    for link in news_links:
        # Step 4: Find the 'a' tag and get its 'href' attribute
        title = link.find('a')['title']
        relative_url = link.find('a')['href']
        
        # Step 5: Prepend the base URL to the relative URL
        full_content_url = relative_url
        
        # Request the content of the article
        inner_response = requests.get(full_content_url, headers=headers)
        inner_soup = BeautifulSoup(inner_response.content, "html.parser")
        
        # Extract the datetime
        datetime = inner_soup.find('time')['datetime']
        
        # Extract the news paragraphs and concatenate them
        news = inner_soup.find_all('p')
        news_text = " ".join([p.get_text(strip=True) for p in news])
        data = {}
        data['datetime'] = datetime
        data['title'] = title
        data['news'] = news_text
        news_data.append(data)

    

HEROMOTOCO.NS
[<div class="content yf-1e4au4k"><a aria-label="How to invest in the Indian stock market" class="subtle-link fin-size-small titles noUnderline yf-13p9sh2" data-ylk="elm:hdln;elmt:link;itc:0;ct:story;g:9e768014-a8e1-4730-b31f-81742bf0afc6;slk:How%20to%20invest%20in%20the%20Indian%20stock%20market;sec:qsp-news;subsec:all;cpos:1" href="https://finance.yahoo.com/news/how-invest-indian-stock-market-092356666.html" title="How to invest in the Indian stock market"><h3 class="clamp yf-1e4au4k">How to invest in the Indian stock market</h3> <p class="clamp yf-1e4au4k">India’s stock market capitalisation topped $4.3tn to overtake Hong Kong as the world’s fourth-largest market.</p> </a> <div class="footer yf-1e4au4k"> <div class="publishing font-condensed yf-da5pxu">Yahoo Finance UK <i aria-hidden="true" data-svelte-h="svelte-sg6bkb">•</i> 18 days ago</div> <div class="taxonomy-links yf-13tszyc"><a aria-label="COALINDIA.NS" class="ticker x-small hover2 border streaming yf-ravs5v" dat

In [28]:
import yfinance as yf
msft=yf.Ticker("tcs.ns")
print(msft.quarterly_financials)

                                                             2024-03-31  \
Tax Effect Of Unusual Items                            546954715.413378   
Tax Rate For Calcs                                             0.257998   
Normalized EBITDA                                        181090000000.0   
Total Unusual Items                                        2120000000.0   
Total Unusual Items Excluding Goodwill                     2120000000.0   
Net Income From Continuing Operation Net Minori...       124340000000.0   
Reconciled Depreciation                                   12460000000.0   
Reconciled Cost Of Revenue                               257370000000.0   
EBITDA                                                   183210000000.0   
EBIT                                                     170750000000.0   
Net Interest Income                                       35550000000.0   
Interest Expense                                           2260000000.0   
Interest Income          

In [12]:
import pandas as pd
df = pd.DataFrame(news_data)
df.to_csv('news_data6.csv',index=False)

In [55]:
# Adjust ticker symbols for NSE
company_tickers = [
    "HDFCBANK.NS", "RELIANCE.NS", "ICICIBANK.NS", "INFY.NS", "LT.NS",
    "TCS.NS", "ITC.NS", "BHARTIARTL.NS", "AXISBANK.NS", "SBIN.NS",
    "M&M.NS", "KOTAKBANK.NS", "HINDUNILVR.NS", "BAJFINANCE.NS", "NTPC.NS",
    "TATAMOTORS.NS", "SUNPHARMA.NS", "MARUTI.NS", "HCLTECH.NS", "POWERGRID.NS",
    "TATASTEEL.NS", "TITAN.NS", "ULTRACEMCO.NS", "ASIANPAINT.NS", "ADANIPORTS.NS",
    "COALINDIA.NS", "ONGC.NS", "BAJAJ-AUTO.NS", "HINDALCO.NS", "GRASIM.NS",
    "INDUSINDBK.NS", "NESTLEIND.NS", "TECHM.NS", "JSWSTEEL.NS", "BAJAJFINSV.NS",
    "ADANIENT.NS", "SHRIRAMFIN.NS", "CIPLA.NS", "DRREDDY.NS", "HEROMOTOCO.NS",
    "WIPRO.NS", "TATACONSUM.NS", "SBILIFE.NS", "BRITANNIA.NS", "EICHERMOT.NS",
    "APOLLOHOSP.NS", "HDFCLIFE.NS", "BPCL.NS", "DIVISLAB.NS", "LTIM.NS"
]

In [49]:
import yfinance as yf
financial_data={}
for i in company_tickers:
    stock = yf.Ticker(f'{i}')
    financial_data[i]=stock.financials

In [52]:
import yfinance as yf



financial_data = {}

for ticker in company_tickers:
    stock = yf.Ticker(ticker)
    financial_data[ticker] = stock.financials

print(financial_data)


{'HDFCBANK.NS':                                                          2024-03-31  \
Tax Effect Of Unusual Items                                     0.0   
Tax Rate For Calcs                                         0.145257   
Total Unusual Items                                             0.0   
Total Unusual Items Excluding Goodwill                          0.0   
Net Income From Continuing Operation Net Minori...   640620400000.0   
Reconciled Depreciation                               30920800000.0   
Net Interest Income                                 1295104700000.0   
Interest Expense                                    1541385500000.0   
Interest Income                                     2836490200000.0   
Normalized Income                                    640620400000.0   
Net Income From Continuing And Discontinued Ope...   640620400000.0   
Diluted Average Shares                                 7117213643.0   
Basic Average Shares                                   708494

In [67]:
import pandas as pd

# Prepare list of DataFrames
data_frames = []
for ticker, data in financial_data.items():
    if not data.empty:
        # Reset the index and keep it as a column
        data_reset = data.reset_index()
        data_reset['Ticker'] = ticker
        data_frames.append(data_reset)

# Combine all DataFrames
if data_frames:
    combined_df = pd.concat(data_frames, axis=0, ignore_index=True)
    combined_df.to_csv('financial_data.csv', index=False)
else:
    print("No financial data to combine.")


In [64]:
df.to_csv('financial_data.csv')

In [7]:
# working

import requests
from bs4 import BeautifulSoup
import time


news_data = []

for ticker in company_tickers.values():
    
    ticker = ticker[:-3]
    news_url = f"https://in.tradingview.com/symbols/NSE-{ticker}/news/"
    # Headers to mimic browser behavior
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    # Step 1: Request the main news page for Reliance Industries
    response = requests.get(news_url, headers=headers)
    # Step 2: Parse the page with BeautifulSoup
    soup = BeautifulSoup(response.content, "html.parser")

    # Step 3: Find all <div> tags with the relevant class and extract the links
    sub_link = soup.find('a')['href']
    print(sub_link)
    '''
    for link in news_links:
        # Step 4: Find the 'a' tag and get its 'href' attribute
        title = link.find('a')['title']
        relative_url = link.find('a')['href']
        
        # Step 5: Prepend the base URL to the relative URL
        full_content_url = relative_url
        
        # Request the content of the article
        inner_response = requests.get(full_content_url, headers=headers)
        inner_soup = BeautifulSoup(inner_response.content, "html.parser")
        
        # Extract the datetime
        datetime = inner_soup.find('time')['datetime']
        
        # Extract the news paragraphs and concatenate them
        news = inner_soup.find_all('p')
        news_text = " ".join([p.get_text(strip=True) for p in news])
        data = {}
        data['datetime'] = datetime
        data['title'] = title
        data['news'] = news_text
        news_data.append(data)

    '''

/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/
/


KeyboardInterrupt: 

In [54]:
import requests
import pandas as pd
from bs4 import BeautifulSoup


tvnewsdata = []
# URL and headers from the curl command
url = "https://news-headlines.tradingview.com/v2/view/headlines/symbol?client=web&lang=en&section=&streaming=true&symbol=NSE%3AITC"
headers = {
    "sec-ch-ua": '"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"',
    "Referer": "https://in.tradingview.com/",
    "sec-ch-ua-mobile": "?0",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
    "sec-ch-ua-platform": '"Windows"'
}

# Send the GET request
response = requests.get(url, headers=headers)

# Correctly call the json() method
data = response.json()

# Extract and store all storyPath values
story_paths = [item['storyPath'] for item in data['items']]

# Print the story paths
for path in story_paths:
    iurl = "https://in.tradingview.com" + path
    response2 = requests.get(iurl, headers=headers)
    soup = BeautifulSoup(response2.content, "html.parser")
    articles = soup.find('div', class_="content-kp3krhyk single-kp3krhyk")
    title_element = soup.find('h1')
    title = title_element.get_text() if title_element else ""
    time_element = articles.find('time')
    date = time_element['datetime'] if time_element else ""
    news_content = ""
    for span in articles.find_all('span'):
        for p in span.find_all('p'):
            news_content += p.get_text(separator="\n")
    data={}
    data['date']= date
    data['title']= title
    data['news']= news_content
    tvnewsdata.append(data)
df = pd.DataFrame(tvnewsdata)
df= df.dropna()
df.to_csv('tvnewsdata.csv')

In [22]:
import yfinance as yf
stock = yf.Ticker('HDFCBANK.NS')
print(stock.get_news())

[{'uuid': '0e36e44e-c91b-3c68-8dc4-95abcb915b0a', 'title': 'Form 8.3 - Equals Group PLC', 'publisher': 'GlobeNewswire', 'link': 'https://finance.yahoo.com/news/form-8-3-equals-group-090300096.html', 'providerPublishTime': 1726131780, 'type': 'STORY'}, {'uuid': 'a0810102-c723-3c6a-a39e-fb7e97c84c1f', 'title': 'Air-taxi maker Vertical completes phase one testing of prototype', 'publisher': 'Reuters', 'link': 'https://finance.yahoo.com/news/air-taxi-maker-vertical-completes-090234765.html', 'providerPublishTime': 1726131754, 'type': 'STORY'}, {'uuid': '01d09d7a-3625-3d79-8441-5e036def8d8b', 'title': 'FRN Variable Rate Fix', 'publisher': 'Business Wire', 'link': 'https://finance.yahoo.com/news/frn-variable-rate-fix-090000748.html', 'providerPublishTime': 1726131600, 'type': 'STORY'}, {'uuid': '0c29a251-b5d3-3e22-9822-20c853cbe899', 'title': 'Helen Badger joins Critical IoT Connectivity leaders CSL Group as Marketing Director', 'publisher': 'PR Newswire', 'link': 'https://finance.yahoo.com/