In [1]:
from pytube import Playlist
from youtube_transcript_api import YouTubeTranscriptApi 

In [2]:
STOCK_NAMES = [
    "Reliance", "TCS", "HDFC", "Bharti Airtel", "ICICI", 
    "Infosys", "HUL", "SBI", "ITC", "LIC India"
]

In [3]:
NDTV_Profit = Playlist("https://www.youtube.com/playlist?list=PLHQtHjXocftcYYP0b3fd8gUFwCGZsE6C4")
CNBC_TV_18 = Playlist("https://www.youtube.com/playlist?list=PLjq9mRS1PfGCFov0hYIP-5XwQoAGHt2TR")
Lists = [NDTV_Profit, CNBC_TV_18]

In [12]:
from datetime import datetime, timedelta
from pytube import Playlist, YouTube
from youtube_transcript_api import YouTubeTranscriptApi

def filter_videos_by_date(playlist, days_old=1, max_videos=10):
    recent_videos = []
    one_day_ago = datetime.now() - timedelta(days=days_old)
    
    for idx, video_url in enumerate(playlist):
        if idx >= max_videos:
            break  # Stop if we have reached the max limit of videos to process
        
        try:
            yt = YouTube(video_url)
            if yt.publish_date >= one_day_ago:
                recent_videos.append(video_url)
        except Exception as e:
            print(f"Error processing video {video_url}: {e}")
    
    return recent_videos

# Function to fetch transcripts and filter by stock names
def get_relevant_transcripts(playlist_list, stock_names, max_videos_per_playlist=10):
    transcripts = []
    
    for playlist in playlist_list:
        recent_videos = filter_videos_by_date(playlist, max_videos=max_videos_per_playlist)
        for video_url in recent_videos:
            video_id = video_url.split("v=")[-1]
            try:
                transcript = YouTubeTranscriptApi.get_transcript(video_id)
                transcript_text = " ".join(segment['text'] for segment in transcript)
                transcripts.append(transcript_text)
                
                # Check if transcript mentions any of the specified stocks
                # if any(stock.lower() in transcript_text.lower() for stock in stock_names):
                #     if transcript_text not in transcripts:  # Avoid duplicates
                #         transcripts.append(transcript_text)
            except Exception as e:
                print(f"No transcript found for video ID {video_id}: {e}")
    
    return transcripts

In [13]:
NDTV_Transcript = get_relevant_transcripts(Lists, STOCK_NAMES)


No transcript found for video ID vOPPUnEMadQ: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=vOPPUnEMadQ! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!
No transcript found for video ID T_ngatTo6Dk: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=T_ngatTo6Dk! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix

In [15]:
import os
import dotenv
import google.generativeai as genai

# Load environment variables from .env file
dotenv.load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

if not api_key:
    raise ValueError("GEMINI_API_KEY not found in environment variables")

genai.configure(api_key=api_key)

generation_config = {
    "temperature": 1,
    "top_p": 0.95,
    "top_k": 64,
    "max_output_tokens": 8192,
    "response_mime_type": "application/json",
}

model = genai.GenerativeModel(
    model_name="gemini-1.5-pro",
    generation_config=generation_config,
    system_instruction="Summarize only the relevant information about the specified stock names."
)

# Use Generative AI to summarize
response = model.start_chat(
    history=[
        {
            "role": "user",
            "parts": [
                f"Summarize the following transcript texts, focusing only on mentions of {', '.join(STOCK_NAMES)}:\n{NDTV_Transcript[:5]}"
            ],
        }
    ]
).send_message(NDTV_Transcript)

print(response.text)

  from .autonotebook import tqdm as notebook_tqdm


{"Infosys": "The street is expecting a strong revenue growth for Infosys, with dollar revenue growth projected at 3.6% and constant currency revenue growth at 3%. This positive outlook is based on Infosys's recent outperformance of its peers and the anticipation of a guidance upgrade. Analysts believe that the company will raise its current revenue guidance from 3-4%. Key factors driving this optimism include strong revenue growth ahead of peers and a positive outlook for the BFSI sector, which constitutes a significant portion of Infosys's revenue."}



In [116]:
response.text

'{"stock_name": "Sundaram Finance", "summary": "The stock price of Sundaram Finance increased by 5.12%."},\n{"stock_name": "Gold Finance", "summary": "The stock price of Gold Finance increased by 5%."},\n{"stock_name": "NBCC", "summary": "The stock price of NBCC increased by 6%. The increase was likely due to positive discussions surrounding PSU stocks."},\n{"stock_name": "Westlife Food", "summary": "The stock price of Westlife Food increased by 6%. This followed discussions about their QSRs with Mangalam."},\n{"stock_name": "Ruka", "summary": "The stock price of Ruka increased by 10%. The increase is attributed to positive market sentiment and potentially favorable policy announcements in the sugar industry."},\n{"stock_name": "Power Grid Corporation of India", "summary": "Power Grid Corporation of India\'s stock price experienced downward pressure, contributing to the red color on the Nifty index."},\n{"stock_name": "Bajaj Auto", "summary": "Bajaj Auto\'s stock performed well and was

In [119]:
from fastapi import FastAPI, HTTPException
import requests
import uvicorn
import nest_asyncio
import json
app = FastAPI()
@app.get("/get-summary")
def get_summary():
    response_text = f'[{response.text}]'
    return json.loads(response_text)

In [120]:

nest_asyncio.apply()
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8001)

INFO:     Started server process [16320]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8001 (Press CTRL+C to quit)


INFO:     172.16.5.116:60655 - "GET /get-summary HTTP/1.1" 200 OK
INFO:     172.16.5.116:60687 - "GET /get-summary HTTP/1.1" 200 OK
INFO:     127.0.0.1:55523 - "GET /get-summary HTTP/1.1" 200 OK
INFO:     127.0.0.1:55523 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     172.16.5.116:57721 - "GET /get-summary HTTP/1.1" 200 OK
INFO:     172.16.5.116:57721 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     172.16.5.116:57734 - "GET /docs HTTP/1.1" 200 OK
INFO:     172.16.5.116:57734 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     172.16.5.116:58290 - "GET /get-summary HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [16320]
