# NEWS SCRAPING

### GNews Articles

In [13]:
from datetime import datetime, timedelta, date
import pandas as pd
import time
from gnews import GNews
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def get_days_from_period(period: str) -> int:
    """
    Converts a period string (e.g., '7d', '30d', '365d') to the number of days.

    Args:
    - period (str): The period string.

    Returns:
    - int: The number of days corresponding to the given period.
    """
    period_unit = period[-1]  # 'd' for days
    period_value = int(period[:-1])  # Extract the numeric part

    if period_unit == 'd':
        return period_value
    else:
        raise ValueError("Invalid period format. Use 'd' for days.")

def fetch_news_for_interval(company_name: str, interval_start: str, interval_end: str, max_results: int = 100):
    """
    Fetch news articles for a specific time interval.

    Args:
    - company_name (str): The name of the company to search for.
    - interval_start (str): The start date in 'YYYY-MM-DD' format.
    - interval_end (str): The end date in 'YYYY-MM-DD' format.
    - max_results (int): The maximum number of results for this interval.

    Returns:
    - articles (list): List of dictionaries containing news articles for the interval.
    """
    logger.info(f"Fetching articles for the period: {interval_start} to {interval_end}")
    google_news = GNews(language='en', country='IN', max_results=max_results, start_date=date.fromisoformat(interval_start), end_date=date.fromisoformat(interval_end))
    search_results = google_news.get_news(company_name)
    articles = []
    for result in search_results:
        try:
            link = result["url"]
            date_published = datetime.strptime(result["published date"], '%a, %d %b %Y %H:%M:%S GMT').strftime('%Y-%m-%d')
            title = result["title"]
            if not title:
                raise ValueError("Title empty.")

            article = {
                "Title": title,
                "Link": link,
                "Published_Date": date_published
            }
            articles.append(article)
        except Exception as e:
            logger.error(f"Error : {e}")

    return articles

def news_scraper(company_name: str, period: str, max_results: int = 100):
    """
    Scrapes news articles from Google News for a given company over a split time period.

    Args:
    - company_name (str): The name of the company to search for.
    - period (str): The time period selected from the frontend (e.g., '7d', '30d', '365d', '730d').
    - max_results (int): The maximum number of results per interval (default: 100).

    Returns:
    - df (pd.DataFrame): A DataFrame containing the scraped news articles.
    """

    # Convert the period to the total number of days
    total_days = get_days_from_period(period)

    # Get the current date as the end date (only date, no time)
    end_dt = datetime.now().date()

    # Calculate the start date based on the total days
    start_dt = end_dt - timedelta(days=total_days)

    # Split the date range into 5 equal intervals to get more results
    num_intervals = 5
    interval_days = total_days // num_intervals

    intervals = []
    for i in range(num_intervals):
        interval_start = (start_dt + timedelta(days=i * interval_days)).strftime('%Y-%m-%d')
        interval_end = (start_dt + timedelta(days=(i + 1) * interval_days)).strftime('%Y-%m-%d')
        if i == num_intervals - 1:  # Ensure the last interval goes up to the end date
            interval_end = end_dt.strftime('%Y-%m-%d')

        intervals.append((interval_start, interval_end))

    # Prepare a DataFrame to store all the results
    full_articles_df = pd.DataFrame()

    # Use ThreadPoolExecutor to fetch results in parallel
    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(fetch_news_for_interval, company_name, start, end, max_results) for start, end in intervals]
        
        for future in as_completed(futures):
            articles = future.result()
            interval_df = pd.DataFrame(articles).reset_index(drop=True)
            full_articles_df = pd.concat([full_articles_df, interval_df], ignore_index=True)

    # Sort the final DataFrame by the published date
    full_articles_df = full_articles_df.sort_values(by="Published_Date").reset_index(drop=True)

    return full_articles_df

# Example usage
df = news_scraper("Gujarat Toolroom", "730d", 100)

11/21/2024 08:18:00 PM - Fetching articles for the period: 2022-11-22 to 2023-04-17
11/21/2024 08:18:00 PM - Fetching articles for the period: 2023-04-17 to 2023-09-10
11/21/2024 08:18:00 PM - Fetching articles for the period: 2023-09-10 to 2024-02-03
11/21/2024 08:18:00 PM - Fetching articles for the period: 2024-02-03 to 2024-06-28
11/21/2024 08:18:00 PM - Fetching articles for the period: 2024-06-28 to 2024-11-21


In [38]:
df.columns

Index(['Title', 'Link', 'Published_Date'], dtype='object')

In [39]:
df

Unnamed: 0,Title,Link,Published_Date
0,Orient Green okays expansion plan in green ene...,https://news.google.com/rss/articles/CBMiowFBV...,2023-01-04
1,Sheezan Khan Gets Replaced by Abhishek Nigam i...,https://news.google.com/rss/articles/CBMi-AFBV...,2023-01-13
2,Kerala-Based Jeweller Joyalukkas Withdraws IPO...,https://news.google.com/rss/articles/CBMiswFBV...,2023-02-21
3,Not Being Treated With Respect: Air India Pilo...,https://news.google.com/rss/articles/CBMi8AFBV...,2023-04-26
4,Sunny Leone Oozes Oomph in Scorching Hot Green...,https://news.google.com/rss/articles/CBMizgFBV...,2023-06-04
...,...,...,...
132,Q2FY25 results effect: Smallcap stock below ₹5...,https://news.google.com/rss/articles/CBMi9AFBV...,2024-11-12
133,Guj Toolroom Standalone September 2024 Net Sal...,https://news.google.com/rss/articles/CBMi4wFBV...,2024-11-12
134,Penny Stock Under Rs 15: Company Reports Stell...,https://news.google.com/rss/articles/CBMilgJBV...,2024-11-12
135,Gujarat Toolroom Q2 PAT jumps to Rs 27 crore -...,https://news.google.com/rss/articles/CBMijgFBV...,2024-11-13


## RSS link to Actual Link Extraction

#### Playwright Solution (async programming)

In [None]:
import asyncio
from playwright.async_api import async_playwright
import logging
import time
import pandas as pd

class GoogleNewsLinkResolver:
    def __init__(self, max_concurrent_tasks=10, timeout=5, max_wait=5):
        """
        Initialize the link resolver with configurable parameters.
        
        :param max_concurrent_tasks: Maximum number of concurrent browser tasks
        :param timeout: Page load timeout in seconds
        :param max_wait: Maximum wait time for redirection in seconds
        """
        self.max_concurrent_tasks = max_concurrent_tasks
        self.timeout = timeout * 1000  # Convert to milliseconds for Playwright
        self.max_wait = max_wait
        self.logger = logging.getLogger(__name__)
        logging.basicConfig(level=logging.INFO)

    async def _fetch_single_link(self, link):
        """
        Fetch the original link from a Google News URL.
        
        :param link: Google News link to resolve
        :return: Resolved original link or None if failed
        """
        browser = None
        try:
            async with async_playwright() as p:
                browser = await p.chromium.launch(headless=True)
                page = await browser.new_page()
                await page.goto(link, timeout=self.timeout)
                
                redirected_url = page.url
                start_time = asyncio.get_event_loop().time()
                
                while redirected_url.startswith('https://news.google.com'):
                    await asyncio.sleep(0.5)
                    redirected_url = page.url
                    
                    if asyncio.get_event_loop().time() - start_time > self.max_wait:
                        break
                
                await browser.close()
                return redirected_url
        except Exception as e:
            self.logger.error(f"Error resolving link: {e}")
            return None
        finally:
            if browser:
                await browser.close()

    async def resolve_links(self, links):
        """
        Resolve multiple links concurrently.
        
        :param links: List of Google News links
        :return: Dictionary of rss links: resolved links
        """
        semaphore = asyncio.Semaphore(self.max_concurrent_tasks)
        
        async def bounded_fetch(link):
            async with semaphore:
                return await self._fetch_single_link(link)
        
        tasks = [bounded_fetch(link) for link in links]
        results = await asyncio.gather(*tasks)
        
        return dict(zip(links, results))

    def resolve_links_sync(self, links):
        """
        Synchronous wrapper for resolving links.
        
        :param links: List of Google News links
        :return: Dictionary of original links
        """
        return asyncio.run(self.resolve_links(links))

# Example usage
async def main():
    df = pd.read_excel(r"E:\Intern\Minerva\LLM API\TATA Motors.xlsx")
    links = list(df["Link"])
    
    start_time = time.time()
    resolver = GoogleNewsLinkResolver(max_concurrent_tasks=20)
    resolved_links = await resolver.resolve_links(links)
    
    for original_link, resolved_link in resolved_links.items():
        print(f"Resolved: {resolved_link}")
    print("Total time taken: ", time.time()-start_time)
# If running as a script
if __name__ == "__main__":
    asyncio.run(main())

  asyncio.wait(main())
  asyncio.wait(main())


## Resolved links to Articles text

In [16]:
from gnews import GNews

df = pd.read_excel(r"E:\Intern\Minerva\Web Scrapping\TATA Motors_LinksResolved.xlsx")
df = df.dropna()

google_news = GNews()

url = df["ResolvedLink"][1]

article = google_news.get_full_article(url)
print(article.text)

The breakup of Volkswagen’s proposed joint venture for making budget cars with Tata Motors in 2017 was seen as the end of the road for German car maker’s India plans by analysts. However, five years later the wheels have turned for the company with new product line-up and sales numbers looking up.

Between January-August of the current calendar this year, Skoda (part of Volkswagen Group) sold more cars than it did in the whole of 2012 which was the best year for the company in India. As for Volkswagen, the current year, is likely to be the best since its debut in the country in 2007.

Both VW and Skoda have together rolled out six models, including four SUVs using the MQB (Modularer Querbaukasten meaning modular transversal toolkit) platform in the last three years. While none of these launches were in the budget category, these introductions happened just when the Indian car market was undergoing yet another pivotal shift.

Also Read Q2 Result 2024: HAL, Hero Motocorp, Muthoot Finance

### Trafilatura attempt

* Trafilatura was found to be faster and more suitable for async calls

In [21]:
import pandas as pd
import asyncio
import aiohttp
import time
from trafilatura import extract

async def fetch_url_async(session, url):
    """Fetch the content of the URL asynchronously using aiohttp."""
    async with session.get(url) as response:
        return await response.text()

async def extract_content(url, session):
    """Fetch and extract content from a URL asynchronously."""
    try:
        # Fetch the page content asynchronously
        page_content = await fetch_url_async(session, url)
        
        # Extract content using trafilatura (this is still blocking)
        tflr_content = extract(page_content)
        
        # Check if content was extracted
        if not tflr_content:
            raise ValueError("Trafilatura failed to extract content")
        
        print(f"Extracted content for {url}:\n{len(tflr_content)}")
        
    except Exception as e:
        print(f"Error URL: {url}, Error: {str(e)}")

# Set up the main logic for the Jupyter Notebook
async def process_urls():
    # Read the DataFrame containing the URLs
    df = pd.read_excel(r"E:\Intern\Minerva\Web Scrapping\TATA Motors_LinksResolved.xlsx")
    df = df.dropna()

    # Set up an aiohttp session
    async with aiohttp.ClientSession() as session:
        # Create a list of tasks for URL processing
        tasks = []
        for url in df["ResolvedLink"]:
            if not url.startswith("https://news.google.com"):
                tasks.append(extract_content(url, session))
        
        # Run the tasks concurrently
        await asyncio.gather(*tasks)

# Run the process in Jupyter
start_time = time.time()
await process_urls()
print("Time taken: ", time.time() - start_time)


Extracted content for https://www.business-standard.com/article/companies/tata-motors-signs-pact-with-ride-hailing-company-uber-to-supply-25-000-evs-123022000923_1.html:
317
Extracted content for https://www.telegraphindia.com/business/tata-motors-to-increase-prices-of-commercial-vehicle/cid/1903713:
271
Extracted content for https://indianexpress.com/article/explained/tata-motors-new-york-stock-exchange-indian-markets-8259994/:
2199
Extracted content for https://economictimes.indiatimes.com/industry/renewables/tata-motors-back-on-funding-street-seeks-around-600m-for-electric-vehiles-business/articleshow/97155362.cms?from=mdr:
5951
Extracted content for https://economictimes.indiatimes.com/news/company/corporate-trends/tata-motors-vp-rajan-amba-appointed-as-md-of-jaguar-land-rover-india/articleshow/98015757.cms?from=mdr:
3396
Extracted content for https://economictimes.indiatimes.com/industry/auto/auto-news/tata-motors-target-of-achieving-net-zero-automotive-debt-by-fy24-on-track-co-gr

11/22/2024 03:45:49 AM - parsed tree length: 0, wrong data type or not valid HTML
11/22/2024 03:45:49 AM - empty HTML tree: None
11/22/2024 03:45:49 AM - discarding data: None


Extracted content for https://economictimes.indiatimes.com/industry/renewables/tata-motors-lines-up-suitors-for-stake-in-ev-business/articleshow/98164751.cms?from=mdr:
2765
Extracted content for https://economictimes.indiatimes.com/industry/renewables/cummins-inc-and-tata-motors-collaborate-to-offer-solutions-in-hydrogen-powered-commercial-vehicle-space/articleshow/95510364.cms?from=mdr:
4506
Extracted content for https://www.businesstoday.in/markets/company-stock/story/tata-motors-loss-narrows-to-rs-945-crore-in-q2-revenue-up-30-yoy-352305-2022-11-09:
2427
Error URL: https://www.mobilityoutlook.com/features/for-tata-motors-disruption-is-the-mantra-in-passenger-vehicles/, Error: Trafilatura failed to extract content
Extracted content for https://www.thehindubusinessline.com/companies/tata-motors-to-drive-in-a-host-of-alternative-fuel-trucks-and-buses-towards-2045-net-zero-goals/article66365628.ece:
2670
Extracted content for https://www.thehindubusinessline.com/companies/tata-motors-ge

11/22/2024 03:45:50 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:45:50 AM - empty HTML tree: None
11/22/2024 03:45:50 AM - discarding data: None


Extracted content for https://www.ndtvprofit.com/markets/tata-motors-bets-big-on-ev-disruption-strategy-to-drive-growth:
6367
Extracted content for https://www.fortuneindia.com/enterprise/ratan-tata-the-man-who-made-tata-motors-a-carmaker-of-consequence-turns-85/110929:
4979
Extracted content for https://www.indiatoday.in/auto/cars/story/tata-motors-sells-over-500000-pvs-in-2022-beats-hyundai-in-december-sales-2316261-2023-01-02:
3110
Error URL: https://www.tatamotors.com/press-releases/tata-motors-delivers-218-winger-veterinary-vans-to-the-government-of-west-bengal/, Error: Trafilatura failed to extract content
Extracted content for https://www.business-standard.com/article/companies/tata-motors-subsidiary-completes-acquisition-of-ford-india-s-sanand-plant-122123001062_1.html:
316


11/22/2024 03:45:51 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:45:51 AM - empty HTML tree: None
11/22/2024 03:45:51 AM - discarding data: None
11/22/2024 03:45:51 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:45:51 AM - empty HTML tree: None
11/22/2024 03:45:51 AM - discarding data: None


Extracted content for https://www.reuters.com/business/autos-transportation/indias-tata-motors-posts-first-profit-two-years-demand-surge-2023-01-25/:
43
Extracted content for https://www.cnbctv18.com/business/companies/tata-motors-completes-acquisition-of-ford-india-manufacturing-plant-gujarat-15631761.htm:
3799
Error URL: https://www.tatamotors.com/press-releases/tata-motors-partners-with-graphic-era-hill-university-to-upskill-its-workforce-cultivate-in-demand-technical-expertise/, Error: Trafilatura failed to extract content
Extracted content for https://www.thehindubusinessline.com/companies/tata-motors-sees-increased-demand-from-rural-areas/article66281669.ece:
1770
Extracted content for https://www.team-bhp.com/news/tata-says-fit-finish-problems-are-normal-characteristics-my-safari:
3095
Extracted content for https://www.tatamotors.com/press-releases/tata-tiago-nrg-indias-first-toughroader-cng/:
60
Extracted content for https://www.ndtvprofit.com/business/tata-motors-brings-500-00

11/22/2024 03:45:51 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:45:51 AM - empty HTML tree: None
11/22/2024 03:45:51 AM - discarding data: None
11/22/2024 03:45:51 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:45:51 AM - empty HTML tree: None
11/22/2024 03:45:51 AM - discarding data: None


Extracted content for https://www.tatamotors.com/press-releases/tata-motors-ends-introductory-pricing-for-tiago-ev/:
2965
Extracted content for https://www.tatamotors.com/press-releases/moving-india-forward-at-autoexpo-2023/:
5554
Error URL: https://www.tatamotors.com/press-releases/tata-motors-and-uber-sign-an-mou-for-xpres-t-evs/, Error: Trafilatura failed to extract content
Extracted content for https://www.tataelxsi.com/news-and-events/tata-elxsi-s-tether-paves-the-way-for-tata-motors-connected-vehicle-platform-to-scale-new-heights:
4414
Error URL: https://www.tatamotors.com/press-releases/tata-motors-launches-re-wi-re-its-first-registered-vehicle-scrapping-facility/, Error: Trafilatura failed to extract content
Extracted content for https://www.financialexpress.com/market/tata-motors-shares-plunge-over-4-after-ubs-maintains-sell-with-target-of-rs-825-3607672/:
2084
Extracted content for https://www.autocarpro.in/news-national/tata-motors-records-best-ever-annual-sales-in-2022-5267

11/22/2024 03:45:52 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:45:52 AM - empty HTML tree: None
11/22/2024 03:45:52 AM - discarding data: None


Extracted content for https://www.tatamotors.com/press-releases/tata-motors-bags-an-order-for-delivering-2000-xpres-t-evs-to-evera/:
7330
Extracted content for https://www.autocarpro.in/news-national/tata-motors-to-launch-10-new-products-in-24-36-months-113824:
2882
Extracted content for https://www.tatamotors.com/press-releases/tata-motors-launches-re-wi-re-its-first-registered-vehicle-scrapping-facility/:
5136
Extracted content for https://www.tatamotors.com/press-releases/tata-motors-partners-with-icici-bank-to-offer-financing-for-electric-vehicle-dealers/:
4899
Extracted content for https://www.tatamotors.com/press-releases/go-ev-with-the-all-new-tiago-ev-bookings-open-from-10th-october-12-noon-onwards/:
3252
Error URL: https://www.tatamotors.com/blog/relooking-at-digitization-in-the-automotive-sector-working-toward-a-connected-mobility-universe-2/, Error: Trafilatura failed to extract content
Extracted content for https://www.tatamotors.com/press-releases/tata-tiago-ev-receives-a-

11/22/2024 03:45:52 AM - parsed tree length: 0, wrong data type or not valid HTML
11/22/2024 03:45:52 AM - empty HTML tree: None
11/22/2024 03:45:52 AM - discarding data: None


Error URL: https://www.mobilityoutlook.com/features/tata-motors-takes-design-philosophy-to-the-next-level/, Error: Trafilatura failed to extract content
Extracted content for https://www.indiatoday.in/business/story/tata-motors-shares-decline-for-6th-straight-day-is-it-a-good-time-to-buy-2595008-2024-09-06:
2479
Extracted content for https://www.team-bhp.com/forum/indian-car-dealerships/265307-pathetic-tata-nexon-buying-experience-matrix-style-bullet-dodging-personified.html:
39489
Extracted content for https://www.businesstoday.in/markets/company-stock/story/maruti-suzuki-india-vs-tata-motors-which-automobile-stock-can-deliver-better-returns-375439-2023-03-30:
3522
Extracted content for https://asia.nikkei.com/Business/Automobiles/Tata-Motors-goes-all-out-to-defend-EV-lead-in-India:
460
Extracted content for https://www.thehindubusinessline.com/companies/tata-motors-launches-5-star-safety-rated-new-avatars-of-suvs-safari-harrier/article67430493.ece:
3505
Extracted content for https://

11/22/2024 03:45:58 AM - parsed tree length: 0, wrong data type or not valid HTML


Extracted content for https://www.moneycontrol.com/news/technology/auto/tata-motors-to-invest-2-billion-roll-out-6-7-passenger-vehicles-by-2027-10764111.html:
3023
Extracted content for https://www.moneycontrol.com/news/business/markets/even-as-tata-motors-keeps-outperforming-samir-arora-says-he-does-not-like-the-stock-11728091.html:
2945
Extracted content for https://www.businesstoday.in/markets/company-stock/story/tata-motors-shares-rise-record-high-multibagger-returns-buy-sell-or-hold-406074-2023-11-17:
3105
Extracted content for https://www.businesstoday.in/markets/company-stock/story/tata-technologies-shares-to-list-today-all-eyes-on-tata-motors-stock-heres-why-407653-2023-11-30:
2497


11/22/2024 03:45:58 AM - empty HTML tree: None
11/22/2024 03:45:58 AM - discarding data: None


Error URL: https://www.mobilityoutlook.com/conversations/aspirations-growing-in-the-rural-passenger-car-segment-tata-motors/, Error: Trafilatura failed to extract content
Extracted content for https://www.autocarpro.in/news/tata-motors-to-cut-carbon-emissions-by-30-by-2030-114472:
9798
Extracted content for https://www.autocarpro.in/video/in-conversation-with-tata-motors-mohan-savarkar-117528:
1931
Extracted content for https://economictimes.indiatimes.com/markets/stocks/news/tata-motors-to-sell-9-9-stake-in-tata-technologies-ahead-of-global-engineering-firms-ipo/articleshow/104407004.cms?from=mdr:
4337
Extracted content for https://www.cnbctv18.com/market/tata-motors-share-price-may-fall-another-20-pc-due-to-jlr-indian-passenger-vehicle-warns-ubs-19474314.htm:
4042
Extracted content for https://www.autocarpro.in/analysis-report/tata-motors-aims-to-capture-pv-ev-cv-demand-in-fy2024-amid-growing-competition-115407:
11797
Extracted content for https://www.tatamotors.com/press-releases/ta

11/22/2024 03:45:59 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:45:59 AM - empty HTML tree: None
11/22/2024 03:45:59 AM - discarding data: None


Extracted content for https://www.moneycontrol.com/news/business/markets/tata-motors-shares-tumble-as-september-sales-slip-15-stock-down-12-in-a-month-12834645.html:
2258
Extracted content for https://www.businesstoday.in/markets/company-stock/story/tata-motors-shares-why-motilal-downgraded-stock-post-demerger-news-what-others-say-420084-2024-03-05:
2995
Extracted content for https://www.autocarpro.in/news/tata-motors-signs-mou-with-govt-of-tamil-nadu-to-invest-rs-9000-crore-in-five-years--119595:
3141
Extracted content for https://www.thehindu.com/business/tata-motors-reports-q4-net-profit-of-rs-5408-crore/article66843474.ece:
2884
Error URL: https://www.tatamotors.com/press-releases/celebrating-world-environment-day-tata-motors-introduces-evolve-an-exclusive-customer-engagement-programme-for-tata-ev-owners/, Error: Trafilatura failed to extract content
Extracted content for https://www.reuters.com/world/india/indias-tata-motors-slumps-ubs-warns-significantly-weaker-results-2024-09-11

11/22/2024 03:45:59 AM - parsed tree length: 0, wrong data type or not valid HTML
11/22/2024 03:45:59 AM - empty HTML tree: None
11/22/2024 03:45:59 AM - discarding data: None


Extracted content for https://www.tatamotors.com/press-releases/tiago500000family/:
3410
Extracted content for https://www.business-standard.com/companies/news/tata-motors-to-set-up-manufacturing-unit-in-tamil-nadu-invest-rs-9-000-cr-124031300859_1.html:
313
Extracted content for https://www.livemint.com/companies/news/supreme-court-dismisses-tata-motors-plea-challenging-best-s-decision-to-award-e-bus-contract-to-olectra-greentech-s-evey-trans-11684520446761.html:
1471
Extracted content for https://www.business-standard.com/companies/news/tata-motors-receives-tax-demand-of-nearly-rs-25-cr-plans-to-file-appeal-124050100450_1.html:
311
Error URL: https://www.carwale.com/news/tata-motors-to-hike-prices-in-india-from-17-july/, Error: Trafilatura failed to extract content
Extracted content for https://economictimes.indiatimes.com/markets/stocks/news/fundamental-radar-why-is-tata-motors-poised-to-see-consensus-earnings-upgrade/articleshow/105028764.cms?from=mdr:
857
Extracted content for htt

11/22/2024 03:46:00 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:46:00 AM - empty HTML tree: None
11/22/2024 03:46:00 AM - discarding data: None


Extracted content for https://www.livemint.com/market/stock-market-news/tata-motors-becomes-16th-most-valued-automaker-beats-kia-corporation-mcap-11688991650852.html:
2116
Extracted content for https://www.tatamotors.com/press-releases/tata-motors-launches-high-performance-technologically-advanced-range-of-gensets/:
4113
Error URL: https://www.tatamotors.com/press-releases/tata-motors-joins-hands-with-south-indian-bank-offers-exclusive-financing-program-for-authorized-passenger-ev-dealers/, Error: Trafilatura failed to extract content
Extracted content for https://the-ken.com/story/tata-motors-makes-fleet-comeback-with-evs-now-it-has-to-avoid-cab-car-tag/:
13888
Extracted content for https://www.tatamotors.com/press-releases/tata-motors-ties-up-with-kendriya-police-kalyan-bhandar-to-offer-its-range-of-passenger-electric-vehicles/:
3550
Extracted content for https://www.livemint.com/market/tata-motors-proposes-cancellation-of-a-ordinary-shares-what-does-it-mean-for-investors-15169034954

11/22/2024 03:46:00 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:46:00 AM - empty HTML tree: None
11/22/2024 03:46:00 AM - discarding data: None


Extracted content for https://timesofindia.indiatimes.com/auto/cars/tata-motors-joins-top-10-global-auto-firms-with-51-billion-market-cap/articleshow/112797948.cms:
4262
Extracted content for https://www.moneycontrol.com/news/business/tata-motors-shares-hsbc-hold-muted-upside-12815740.html:
2010
Error URL: https://www.tatamotors.com/press-releases/delhi-charges-ahead-with-400-tata-motors-starbus-evs/, Error: Trafilatura failed to extract content
Extracted content for https://www.livemint.com/market/live-blog/tata-motors-share-price-live-blog-for-07-jul-2023-11688697001091.html:
3769
Extracted content for https://timesofindia.indiatimes.com/city/mumbai/tata-motors-to-split-into-two-listed-entities-cv-and-pv-arm-to-be-separated/articleshow/108219882.cms:
3098
Extracted content for https://www.autocarpro.in/news/tata-motors-slashes-ev-penetration-target-from-50-to-30-by-2030--120964:
4224


11/22/2024 03:46:01 AM - parsed tree length: 0, wrong data type or not valid HTML
11/22/2024 03:46:01 AM - empty HTML tree: None
11/22/2024 03:46:01 AM - discarding data: None


Extracted content for https://economictimes.indiatimes.com/markets/ipos/fpos/tata-tech-ipo-tata-motors-to-take-home-rs-2300-crore-against-rs-34-crore-investment/articleshow/105402544.cms?from=mdr:
4203
Extracted content for https://economictimes.indiatimes.com/industry/auto/auto-news/tata-motors-commences-sales-of-commercial-vehicles-in-thailand/articleshow/105390690.cms?from=mdr:
1066
Extracted content for https://economictimes.indiatimes.com/industry/renewables/blusmart-mobility-taps-tata-motors-mg-motor-india-for-3800-e-cars/articleshow/105911565.cms?from=mdr:
3181
Extracted content for https://economictimes.indiatimes.com/industry/auto/auto-news/tata-motors-board-approves-demerging-cv-and-pv-business-into-two-separate-listed-companies/articleshow/108206224.cms?from=mdr:
2516
Error URL: https://www.carwale.com/news/tata-motors-sells-44044-cars-in-march-2023/, Error: Trafilatura failed to extract content
Extracted content for https://www.autocarpro.in/analysis-sales/electric-cars-and

11/22/2024 03:46:02 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:46:02 AM - empty HTML tree: None
11/22/2024 03:46:02 AM - discarding data: None


Extracted content for https://economictimes.indiatimes.com/markets/stocks/live-blog/tata-motors-q1-results-live-updates-tata-motors-q1-earnings-pat-revenue-profit-loss-fy25-latest-news-01-august-2024/liveblog/112185987.cms?from=mdr:
383
Error URL: https://www.tatamotors.com/press-releases/tata-motors-delivers-first-of-its-kind-hydrogen-fuel-cell-powered-buses-to-indian-oil/, Error: Trafilatura failed to extract content
Extracted content for https://www.businesstoday.in/markets/stocks/story/tata-motors-shares-turn-green-can-the-multibagger-recover-from-correction-blues-445696-2024-09-13:
3579
Extracted content for https://www.tatamotors.com/press-releases/srinagar-smart-city-takes-the-green-route-with-tata-motors-ultra-ev-electric-buses/:
6993


11/22/2024 03:46:02 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:46:02 AM - empty HTML tree: None
11/22/2024 03:46:02 AM - discarding data: None


Extracted content for https://www.tatamotors.com/press-releases/tata-motors-marks-the-advent-of-a-new-era-in-suv-excellence/:
3731
Extracted content for https://www.businesstoday.in/markets/company-stock/story/tata-motors-sales-fall-in-september-2024-shares-react-448491-2024-10-03:
1530
Extracted content for https://www.livemint.com/market/stock-market-news/stock-market-today-tata-motors-m-m-to-bajaj-auto-why-are-auto-stocks-rising-after-stellar-gdp-of-india-11709270105777.html:
3667
Error URL: https://www.tatamotors.com/press-releases/tata-motors-launches-new-avatars-of-safari-and-harrier/, Error: Trafilatura failed to extract content
Extracted content for https://www.tatamotors.com/press-releases/standard-chartered-bank-drives-in-financing-access-for-tata-motors-passenger-electric-vehicle-dealers/:
2374
Extracted content for https://www.tatamotors.com/press-releases/tata-motors-launches-truck-utsav-a-unique-customer-engagement-programme-to-showcase-its-advanced-mobility-solutions/:
5

11/22/2024 03:46:02 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:46:02 AM - empty HTML tree: None
11/22/2024 03:46:02 AM - discarding data: None


Error URL: https://www.tatamotors.com/press-releases/gadget-on-wheels-for-the-evolved-launched-new-nexon-ev-the-game-changer-ev-redefines-advanced-mobility/, Error: Trafilatura failed to extract content
Extracted content for https://www.businesstoday.in/markets/company-stock/story/tata-motors-shares-in-bear-grip-fall-record-high-price-targets-outlook-technicals-448654-2024-10-04:
3717
Extracted content for https://www.tatamotors.com/press-releases/tata-motors-launches-the-new-gen-nexon-with-premium-design-best-in-class-tech-features-2/:
6651
Extracted content for https://www.business-standard.com/companies/results/tata-motors-q4-results-profit-grows-three-fold-to-rs-17-528-crore-124051001050_1.html:
308
Extracted content for https://www.tatamotors.com/press-releases/tata-motors-unveils-state-of-the-art-facilities-for-development-of-hydrogen-propulsion-technologies/:
5235


11/22/2024 03:46:03 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:46:03 AM - empty HTML tree: None
11/22/2024 03:46:03 AM - discarding data: None


Extracted content for https://www.livemint.com/market/stock-market-news/tata-motors-share-price-skyrockets-4-5-on-demerger-plan-crosses-rs-1-000-for-first-time-11709610215767.html:
3770
Extracted content for https://www.tatamotors.com/press-releases/making-ev-charging-more-accessible-bharat-petroleum-and-tata-passenger-electric-mobility-collaborate-to-setup-7000-chargers/:
6321
Extracted content for https://www.business-standard.com/companies/results/tata-motors-q1-fy25-results-net-profit-jumps-74-to-rs-5-566-crore-124080101282_1.html:
308
Error URL: https://www.tatamotors.com/press-releases/tata-motors-and-hdfc-bank-sign-mou-for-seamless-digital-financing-solutions/, Error: Trafilatura failed to extract content
Extracted content for https://www.livemint.com/opinion/online-views/tata-motors-demerger-is-backed-by-a-dual-lane-strategy-11709650040812.html:
3872
Extracted content for https://www.livemint.com/companies/mint-explainer-is-the-tata-motors-demerger-a-strategic-gambit-or-a-nonev

11/22/2024 03:46:03 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:46:03 AM - empty HTML tree: None
11/22/2024 03:46:03 AM - discarding data: None


Error URL: https://www.tatamotors.com/press-releases/tata-motors-and-inchcape-plc-usher-in-a-new-era-of-commercial-vehicle-excellence-in-thailand/, Error: Trafilatura failed to extract content
Extracted content for https://www.tatamotors.com/press-releases/tata-motors-launches-all-new-intra-v70-pickup-intra-v20-gold-pickup-and-ace-ht/:
7000
Extracted content for https://www.tatamotors.com/press-releases/new-nexon-continues-legacy-of-safety-scores-5-stars-in-gncap-rating/:
60
Extracted content for https://www.livemint.com/companies/news/tata-sons-doubles-royalty-fee-for-group-firms-heres-how-much-tata-steel-tcs-tata-motors-will-pay-11715350263903.html:
2541
Extracted content for https://www.livemint.com/companies/tata-motors-says-demerger-will-allow-all-businesses-to-unlock-potential-11719407978129.html:
3437
Extracted content for https://www.tatamotors.com/careers/life-at-tml/:
60
Extracted content for https://www.livemint.com/market/stock-market-news/tata-motors-stock-jumps-5-today-sh

11/22/2024 03:46:04 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:46:04 AM - empty HTML tree: None
11/22/2024 03:46:04 AM - discarding data: None
11/22/2024 03:46:04 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:46:04 AM - empty HTML tree: None
11/22/2024 03:46:04 AM - discarding data: None


Extracted content for https://www.thehindu.com/business/Industry/tata-motors-board-approves-scheme-for-separation-of-passenger-vehicle-and-commercial-vehicle-businesses/article68472748.ece:
1816
Error URL: https://www.tatamotors.com/press-releases/tata-motors-amrutdhara-program-brings-hope-to-water-deprived-communities-in-uttarakhand/, Error: Trafilatura failed to extract content
Error URL: https://www.tatamotors.com/press-releases/tata-motors-flags-off-its-next-gen-eco-friendly-fleet-of-commercial-vehicles-to-tata-steel/, Error: Trafilatura failed to extract content
Extracted content for https://www.businesstoday.in/markets/company-stock/story/tata-motors-shares-trading-lower-clsa-downgrades-419512-2024-02-29:
1992
Extracted content for https://www.business-standard.com/markets/capital-market-news/tata-motors-total-sales-drop-11-yoy-in-sept-24-124100100769_1.html:
299
Extracted content for https://www.thehindu.com/business/Industry/tata-motors-to-demerge-passenger-commercial-business-

11/22/2024 03:46:05 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:46:05 AM - empty HTML tree: None
11/22/2024 03:46:05 AM - discarding data: None


Extracted content for https://www.livemint.com/market/stock-market-news/stocks-to-watch-tata-motors-vedanta-ril-federal-bank-iifl-finance-dlf-11710381451564.html:
6107
Extracted content for https://www.livemint.com/companies/news/tata-motors-q4-update-jaguar-land-rovers-retail-sales-surge-81-strongest-performance-since-2009-11712749559712.html:
2142
Extracted content for https://www.livemint.com/auto-news/tata-motors-festive-season-suv-passenger-vehicles-safari-nexon-imd-rainfall-cash-offers-rural-sales-manufacturers-11725890801177.html:
2132
Error URL: https://www.tatamotors.com/press-releases/magenta-mobility-deepens-collaboration-with-tata-motors/, Error: Trafilatura failed to extract content
Extracted content for https://www.livemint.com/market/stock-market-news/tata-motors-vs-mahindra-mahindra-which-auto-major-should-you-pick-for-the-long-term-stock-market-investments-11711045132828.html:
8132
Extracted content for https://www.livemint.com/market/stock-market-news/tata-motors-to-d

11/22/2024 03:46:07 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:46:07 AM - empty HTML tree: None
11/22/2024 03:46:07 AM - discarding data: None
11/22/2024 03:46:07 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:46:07 AM - empty HTML tree: None
11/22/2024 03:46:07 AM - discarding data: None
11/22/2024 03:46:07 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:46:07 AM - empty HTML tree: None
11/22/2024 03:46:07 AM - discarding data: None
11/22/2024 03:46:07 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:46:07 AM - empty HTML tree: None
11/22/2024 03:46:07 AM - discarding data: None


Error URL: https://www.tatamotors.com/press-releases/tata-motors-launches-vidyadhan-and-utkarsha-programs-to-facilitate-higher-education-of-technicians-children/, Error: Trafilatura failed to extract content
Extracted content for https://www.tatamotors.com/press-releases/tata-motors-launches-its-mid-suv-curvv-at-a-starting-price-of-rs-9-99-lakh/:
7046
Extracted content for https://www.tatamotors.com/press-releases/tata-motors-registered-total-sales-of-215034-units-in-q2-fy25/:
4702
Error URL: https://www.tatamotors.com/press-releases/tata-motors-announces-price-increase-of-its-commercial-vehicles-from-july-2024/, Error: Trafilatura failed to extract content
Error URL: https://www.tatamotors.com/press-releases/tata-motors-group-global-wholesales-at-329847-in-q1-fy25/, Error: Trafilatura failed to extract content
Extracted content for https://www.tatamotors.com/press-releases/tata-motors-registered-total-sales-of-71693-units-in-august-2024/:
1673
Error URL: https://www.tatamotors.com/pre

11/22/2024 03:46:08 AM - parsed tree length: 1, wrong data type or not valid HTML
11/22/2024 03:46:08 AM - empty HTML tree: None
11/22/2024 03:46:08 AM - discarding data: None


Extracted content for https://www.tatamotors.com/press-releases/tata-motors-launches-festival-of-cars-with-incredible-prices-for-its-cars-suvs/:
2025
Error URL: https://www.tatamotors.com/press-releases/tata-motors-celebrates-900000th-vehicle-rollout-from-its-lucknow-facility/, Error: Trafilatura failed to extract content
Extracted content for https://www.tatamotors.com/press-releases/20-lakh-suvs-strong-tata-motors-indias-leading-suv-manufacturer-celebrates-a-historic-milestone-of-its-suv-legacy/:
2830
Extracted content for https://www.tatamotors.com/press-releases/tata-motors-signs-mou-with-esaf-small-finance-bank-for-commercial-vehicle-financing/:
2182
Extracted content for https://www.tatamotors.com/press-releases/tata-power-renewable-energy-and-tata-motors-sign-a-mou-to-set-up-200-fast-charging-stations/:
3462
Extracted content for https://www.tatamotors.com/press-releases/tata-motors-registered-total-sales-of-77521-units-in-april-2024/:
1652
Extracted content for https://www.tata

In [1]:
# Gemini

In [1]:
pip install -q -U google-generativeai

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
!where python

e:\Intern\Minerva\Final_Dashboard_GIT\venv\Scripts\python.exe
C:\Users\Sahal\AppData\Local\Programs\Python\Python312\python.exe
C:\Users\Sahal\AppData\Local\Microsoft\WindowsApps\python.exe


In [1]:
import google.generativeai as genai
import os

genai.configure(api_key=os.environ["GEMINI_API_KEY"])

model = genai.GenerativeModel("gemini-1.5-flash")
response = model.generate_content("Write a story about a magic backpack. in 5 lines")
print(response.text)

  from .autonotebook import tqdm as notebook_tqdm


Elara found a worn leather backpack in the attic; it hummed faintly.  Inside, she discovered shimmering silks, transforming into anything she imagined.  A castle appeared for tea parties, a dragon for imaginary battles.  But one day, a mischievous imp emerged, demanding a wish in exchange for its silence.  Elara, clever and brave, outwitted the imp, keeping her magical backpack and its secrets.



In [1]:
a= [3,4]
print(a*3)

[3, 4, 3, 4, 3, 4]


In [2]:
c_years = ["Freshman", "Sophomore", 'Junior', 'Senior']
list(enumerate(c_years, 2019))

[(2019, 'Freshman'), (2020, 'Sophomore'), (2021, 'Junior'), (2022, 'Senior')]

In [3]:
def count_rec(n=1):
    if n>3:
        return
    print(n)

    count_rec(n+1)
count_rec()

1
2
3


In [2]:
x  = (1,2,3,4)
try: x.append(5)
    print(x)
except:
    print(x)
finally:
    print("End")

SyntaxError: expected 'except' or 'finally' block (3433672171.py, line 3)

In [3]:
'raining'.find('z')

-1

In [2]:
import itertools

# Numbers from 1 to 11
numbers = list(range(1, 12))

# All possible combinations of '+' and '-'
operations = itertools.product(['+', '-'], repeat=10)

# Function to evaluate expression with given operations
def evaluate_expression(ops):
    expression = str(numbers[0])
    for i, op in enumerate(ops):
        expression += f" {op} {numbers[i + 1]}"
    return eval(expression)

# Count valid expressions that equal 42
count = 0
for ops in operations:
    if evaluate_expression(ops) == 42:
        count += 1

print("Number of ways to get 42:", count)


Number of ways to get 42: 7


In [1]:
[1] + [2,3]

[1, 2, 3]