In [62]:
from dotenv import load_dotenv
load_dotenv()
import os
import google.generativeai as genai
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
from youtube_transcript_api import YouTubeTranscriptApi

model_config = {
  "temperature": 0.1,
  "top_p": 1,
  "top_k": 1,
}

summarizer_prompt="""You are a Youtube video summarizer. 
You will be taking the transcript text and summarizing the content. Please provide the summary of the text given here : """

##Getting the transcript data from yt videos
def extract_transript_details(youtube_video_url):
    try:
        video_id=youtube_video_url.split("=")[1]
        # print(video_id)
        transcript_text=YouTubeTranscriptApi.get_transcript(video_id)
        transcript=""
        for i in transcript_text:
            transcript+=" "+i["text"]

    except Exception as e:
        raise e
    return transcript

#Getting the summary based on prompt from Google Gemini Pro
def generate_gemini_content(transcript_text, prompt):
    model=genai.GenerativeModel("gemini-pro", generation_config= model_config)
    response=model.generate_content(prompt+transcript_text)
    return response.text


In [63]:
claim_prompt='''You are a medical expert who needs to research the validity of claims for scientific evidence.
You will be provided a text. Identify at most 5 health/medicine/science/nutrition related claims that you would like to verify.
Ignore opinions. The claims needs to be given in single line points separated by *. Include medical terminology. 
Structure the sentences as Subject-verb-object (SVO) format.'''

def generate_gemini_claims(summary, prompt):
    model=genai.GenerativeModel('gemini-pro', generation_config= model_config)
    response=model.generate_content(prompt+summary)
    return response.text

In [85]:
youtube_link= "https://www.youtube.com/watch?v=iFtqudy39sA"
transcript_text=extract_transript_details(youtube_link)
summary=generate_gemini_content(transcript_text, summarizer_prompt)
claims=generate_gemini_claims(summary, claim_prompt)

In [86]:
lines= claims.strip().split("\n")
claims_list= [line.lstrip('* ').strip() for line in lines if line.startswith('* ')]
claims_list

['Sunlight exposure improves mood, energy, and sleep.',
 'Caffeine consumption after waking prevents afternoon crashes.',
 'Distraction removal enhances focus and productivity.',
 'Protein-rich breakfasts sustain energy levels.',
 'Post-meal walks regulate digestion and glucose levels.']

In [87]:
keyword_prompt= """You are a medical researcher who wants to check the validity of the following claim by searching for articles from pubmed. 
Extract 3 keywords summarizing the claim."""

def generate_gemini_keywords(claims, keyword_prompt):
    model=genai.GenerativeModel('gemini-pro', generation_config= model_config)
    response=model.generate_content(keyword_prompt+claims)
    return response.text


In [88]:
claims_list

['Sunlight exposure improves mood, energy, and sleep.',
 'Caffeine consumption after waking prevents afternoon crashes.',
 'Distraction removal enhances focus and productivity.',
 'Protein-rich breakfasts sustain energy levels.',
 'Post-meal walks regulate digestion and glucose levels.']

In [90]:

for claim in claims_list:
    response= generate_gemini_keywords(claims= claim, keyword_prompt=keyword_prompt)
    print(response)
    print("\n")

- Sunlight exposure
- Mood
- Sleep


- Caffeine
- Afternoon crashes
- Wakefulness


1. Distraction
2. Focus
3. Productivity


1. Protein-rich breakfasts
2. Energy levels
3. Sustain


- Postprandial
- Digestion
- Glucose metabolism


