In [3]:
from youtube_transcript_api import YouTubeTranscriptApi
import re

In [4]:

def extract_video_id(youtube_url):
    """
    Extract the YouTube video ID from a URL.
    Handles various YouTube URL formats.
    """
    video_id_match = re.search(r'(?:v=|\/videos\/|youtu.be\/|\/v\/|\/e\/|\/watch\?v=|&v=|\/embed\/|%2Fvideos%2F|embed%2F|youtu.be%2F|%2Fv%2F|%2Fe%2F|youtube.com\/embed\/|youtube.com\/v\/|youtube.com\/watch\?v=)([^#\&\?\n\/]+)', youtube_url)
    
    if video_id_match:
        return video_id_match.group(1)
    else:
        raise ValueError("Could not extract video ID from URL. Please provide a valid YouTube URL.")

In [5]:
def get_transcript(youtube_url, output_file=None, language='en'):
    """
    Download transcript from a YouTube video URL.
    
    Args:
        youtube_url (str): The YouTube video URL
        output_file (str, optional): File to save the transcript to. If None, prints to console.
        language (str, optional): Language code for the transcript. Defaults to 'en'.
    
    Returns:
        str: The full transcript text
    """
    try:
        # Extract the video ID from the URL
        video_id = extract_video_id(youtube_url)
        
        # Get the transcript
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=[language])
        
        # Combine all transcript pieces into one text
        transcript_text = ""
        for entry in transcript_list:
            transcript_text += entry['text'] + " "
        
        # Clean up the text
        transcript_text = transcript_text.strip()
        
        # Save to file if specified
        if output_file:
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(transcript_text)
            print(f"Transcript saved to {output_file}")
        else:
            print(transcript_text)
            
        return transcript_text
    
    except Exception as e:
        print(f"Error: {e}")
        return None


In [6]:
url="https://www.youtube.com/watch?v=ZPUtA3W-7_I"

#url="https://youtu.be/ZPUtA3W-7_I?si=M3RCw7uKRLmD3qhZ"
output="lex.txt"
language="en"
get_transcript(url, output, language)

Transcript saved to lex.txt


'- My strength lies not in my name, but in the backing of 1.4 billion Indians, and thousands of years of timeless culture\nand heritage. So, wherever I go, I carry with me the\nessence of thousands of years of Vedic tradition, the\ntimeless teachings of Swami Vivekananda, and the blessings, dreams, and aspirations\nof 1.4 billion Indians. When I shake hands with a world leader,\nit\'s not Modi, but 1.4 billion Indians\ndoing so. So, this isn\'t my strength at all. It is\nrather the strength of India. Whenever we speak of peace, the world\nlistens to us. Because India is the land of Gautama\nBuddha and Mahatma Gandhi. And Indians aren\'t hardwired to espouse\nstrife and conflict. We espouse harmony instead. We seek neither to wage war against\nnature, nor to foster strife among\nnations. We stand for peace. And wherever we can act as peacemakers, we\nhave gladly embraced that responsibility. My early life was\nspent in extreme poverty. But we never really felt the burden of\npoverty. Yo

In [None]:
import os
import requests
from urllib.parse import urlencode
from PIL import Image
from io import BytesIO

def get_video_title(video_id, api_key):
    if not video_id or not api_key:
        raise ValueError("Invalid video_id or api_key")

    # Construct the API request URL
    base_url = "https://www.googleapis.com/youtube/v3/videos"
    params = {
        "part": "snippet",
        "id": video_id,
        "key": api_key
    }
    url = f"{base_url}?{urlencode(params)}"
    
    try:
        # Send the API request
        response = requests.get(url)
        response.raise_for_status()  # Raise an error for bad responses
        data = response.json()
        
        # Validate response and extract the video title
        if "items" in data and len(data["items"]) > 0:
            return data["items"][0]["snippet"]["title"]
        else:
            return "Video title not found"
    
    except requests.exceptions.RequestException as e:
        raise requests.RequestException(f"Network or API error: {e}")
    except ValueError as e:
        raise ValueError(f"Error parsing response: {e}")
    except KeyError:
        raise KeyError("Expected data not found in the response")


In [None]:


def get_video_thumbnail(video_id, api_key, save_path):
    
    if not video_id or not api_key or not save_path:
        print("Invalid parameters provided.")
        return False
    
    # Construct YouTube API request URL
    base_url = "https://www.googleapis.com/youtube/v3/videos"
    params = {
        "part": "snippet",
        "id": video_id,
        "key": api_key
    }
    url = f"{base_url}?{urlencode(params)}"
    
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an HTTPError for bad responses (4xx or 5xx)
        data = response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching video details: {e}")
        return False
    except ValueError:
        print("Error parsing JSON response.")
        return False

    # Check if the video data exists and extract thumbnail URL
    if "items" in data and len(data["items"]) > 0:
        thumbnail_url = data["items"][0]["snippet"]['thumbnails']['medium']['url']
        return download_image(thumbnail_url, save_path)
    else:
        print("Video not found or no thumbnail available.")
        return False

def download_image(url, save_path):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for non-200 status codes
        img = Image.open(BytesIO(response.content))
        img.verify()
        
        # If the image is valid, save it
        img = Image.open(BytesIO(response.content))  # Reopen the image after verification
        img.save(save_path)
        print(f"Image successfully downloaded and saved to {save_path}")
        return True
    except requests.exceptions.RequestException as e:
        print(f"Error downloading image: {e}")
        return False
    except (IOError, SyntaxError) as e:
        print(f"Error verifying or saving the image: {e}")
        return False

In [17]:
youtube_url="https://www.youtube.com/watch?v=ZPUtA3W-7_I"
video_id = extract_video_id(youtube_url)
api_key = os.getenv("YOUTUBE_SEARCH_API")
save_path = "./thumbnail.jpg"
get_video_thumbnail(video_id, api_key, save_path)
video_title = get_video_title(video_id, api_key)
video_title

Image successfully downloaded and saved to ./thumbnail.jpg


'Narendra Modi: Prime Minister of India - Power, Democracy, War & Peace | Lex Fridman Podcast #460'

In [None]:
def chunk_text(file_path, chunk_size):
    """
    Reads a text file and splits it into chunks of the specified size.
    
    Args:
        file_path (str): The path to the text file to be read.
        chunk_size (int): The size of each chunk in characters.
        
    Returns:
        list: A list of strings, each representing a chunk of the file's content.
    """
    chunks = []
    with open(file_path, 'r', encoding='utf-8') as file:
        while True:
            # Read a chunk of the specified size
            chunk = file.read(chunk_size*1024)
            if not chunk:
                break
            chunks.append(chunk)
    return chunks

In [None]:
file_path = 'lex.txt'
chunk_size = 32  # Size of each chunk in kilo bytes
chunks = chunk_text(file_path, chunk_size)
print(len(chunks))

In [None]:
import ollama

client = ollama.Client()

system_prompt = (
    "As a professional summarizer, create a detailed yet concise summary of the provided text segment.\n\n"
    "Focus on capturing the main ideas and essential information, "
    "eliminating extraneous details, and maintaining clarity and coherence. "
    "Rely strictly on the provided text, without including external information. "
    "Format the summary with subtopics in paragraph form for easy understanding."
)
model = 'llama3.3:latest'

response = client.create(
  model = 'summary-assistant',
  from_ = 'llama3.3:latest',
  system = system_prompt,
  stream = False,
  parameters={"num_ctx": 40*1024, "temperature": 0.1}
)

print(response.status)

In [None]:
from concurrent.futures import ThreadPoolExecutor

# Define the function to process each chunk
def summarize_chunk(args):
    context, user_prompt = args
    return make_summary(context, user_prompt)

# Assuming 'chunks' is a list of context segments
user_prompt = "As a professional summarizer, create a detailed yet concise summary of the provided Context above."

try:
    with ThreadPoolExecutor(max_workers=20) as executor:
        # Map the summarize_chunk function over the chunks with the user_prompt
        contents = executor.map(summarize_chunk, [(context, user_prompt) for context in chunks])
        
    content_list = [content for content in contents if content['message']['content'] is not None]
except Exception as e:
    print(f"An error occurred: {e}")

print(len(content_list))
content_list

In [None]:
from multiprocessing import Pool

# Define the function to process each chunk
def summarize_chunk(args):
    context, user_prompt = args
    return make_summary(context, user_prompt)

if __name__ == '__main__':
    # Assuming 'chunks' is a list of context segments
    user_prompt = "As a professional summarizer, create a detailed yet concise summary of the provided Context above."

    # Create a pool of worker processes
    with Pool() as pool:
        # Map the summarize_chunk function over the chunks with the user_prompt
        contents = pool.map(summarize_chunk, [(context, user_prompt) for context in chunks])

    # Filter out any content where 'message' or 'content' is None
    content_list = [content for content in contents if content.get('message', {}).get('content') is not None]

    print(len(content_list))
    print(content_list)

# Next Steps

In [1]:
import os

def read_and_concatenate_chunks(directory):
    # Initialize an empty string to hold the concatenated content
    full_content = ""
    
    # List all files in the directory
    files = os.listdir(directory)
    
    # Filter files that start with 'chunk_results_' and end with '.txt'
    chunk_files = [f for f in files if f.startswith('chunk_results_') and f.endswith('.txt')]
    
    # Sort the files based on the numeric part after 'chunk_results_'
    chunk_files.sort(key=lambda x: int(x.split('_')[2].split('.')[0]))
    
    # Iterate over each sorted chunk file
    for index, filename in enumerate(chunk_files, start=1):
        file_path = os.path.join(directory, filename)
        with open(file_path, 'r') as file:
            # Read the content of the file
            content = file.read()
        
        # Create a header for the subcontext
        subcontext_header = f"==== SubContext {index} ======\n"
        
        # Append the header and content to the full_content string
        full_content += subcontext_header + content + "\n"
    
    return full_content



In [2]:
directory_path = '.'
concatenated_content = read_and_concatenate_chunks(directory_path)
print(concatenated_content)

The conversation with Narendra Modi, Prime Minister of India, delves into his life story, leadership philosophy, and spiritual practices. Born in Vadnagar, Gujarat, to a modest family, Modi grew up in extreme poverty but never felt its burden due to his family's resilience and hard work. His father was known for his discipline and early morning routines, while his mother instilled values of service and cleanliness.

Modi’s upbringing was deeply influenced by the rich cultural heritage of Vadnagar, a town with historical significance dating back 2800 years. He recalls his childhood as one filled with curiosity about history and tradition, which later shaped his worldview. Despite living in scarcity, Modi found joy and contentment in simple pleasures and developed habits of neatness and discipline.

Modi’s spiritual journey began early, influenced by the teachings of Swami Vivekananda and other Indian philosophers. At 17, he embarked on a two-year pilgrimage to the Himalayas, where he li

In [3]:
import ollama

prompt = (
    "==== INSTRUCTIONS ===="
    "Integrate the above provided text from various SubContext into a unified, coherent REPORT BODY suitable for publication. \n"
    "This REPORT BODY will be part of a larger report, include only the main body divided into suitable main sections and subsections without any introduction or conclusion section. \n"
    "Ensure that the combined summary: \n"
    "	1.	Maintains Logical Flow: Arrange the content so that ideas transition smoothly, preserving the original meaning and context. \n"
    "	2.	There should NOT be any duplicate topics or subtopics. \n"
    "	3.	Ensures Clarity: Eliminate redundant information, focusing on delivering a clear narrative. \n"
    "	4.	Adheres to Publication Standards: Format the summary appropriately, using paragraph form and ensuring it meets the conventions of professional writing. \n"
    "   5.  Using bullet points only where appropriate.  \n"
    "   6.  As already mentioned you should NOT have an Introduction or a Conclusion section\n"
    "\n"
    "Avoid introducing external information not present in the original segments. The final report body should be comprehensive, providing readers with a clear understanding of the combined content."
)

system_prompt = (
    "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed,"
     "objective and structured reports on given text. \n"
     "The reports you create are well-structured, informative, in-depth, and include facts and numbers if available.\n"
     "The reports should provide a well-structured framework, including the main sections, subsections, and key points to be covered.\n"
     "Use markdown syntax and follow the APA format. \n"
     "Always respond in english language (en)."
)
# Generate a response using the 'llama3.2' model
main_body = ollama.generate(
    model='summary-assistant',
    prompt=f"{concatenated_content}\n{prompt}",
    system=system_prompt
)
main_body_text = main_body.get('response')
print(main_body_text)

## Leadership Philosophy and Spiritual Practices

### Early Life and Upbringing

Narendra Modi was born into a modest family in Vadnagar, Gujarat, where he grew up in extreme poverty but found joy and contentment in simple pleasures. His father's discipline and early morning routines, along with his mother’s values of service and cleanliness, deeply influenced him. The rich cultural heritage of Vadnagar, dating back 2800 years, instilled a sense of curiosity about history and tradition that later shaped Modi’s worldview.

### Spiritual Journey

Modi’s spiritual journey began at an early age, heavily influenced by the teachings of Swami Vivekananda and other Indian philosophers. At 17, he embarked on a two-year pilgrimage to the Himalayas where he lived minimally, often sleeping outdoors without shelter. This period was marked by intense self-exploration and physical endurance, which strengthened his inner resolve.

### Influence of Spiritual Leaders

Modi formed a deep bond with Swami 

In [None]:
print(main_body_text)


In [4]:
prompt = (
    "==== INSTRUCTIONS ===="
    "Review the above provided SubContext and write a short introductions section:\n "
    "	1.	The introduction should be no more than four (4) sentences. \n"
    "	2.	The introduction should capture the attention of the reader so that they will stay engaged with the material. \n"
    "	3.	Do not introduce anything that is not present in the material provided. \n"
)

# Generate a response using the 'llama3.2' model
introduction = ollama.generate(
    model='summary-assistant',
    prompt=f"{concatenated_content}\n{prompt}",
    system=system_prompt
)

introduction_text = introduction.get('response')
print(introduction_text)

Narendra Modi, Prime Minister of India, offers a profound glimpse into his life story, spiritual practices, and leadership philosophy, highlighting his journey from humble beginnings to leading the world's largest democracy. His narrative weaves together deep cultural roots, rigorous self-discipline, and a commitment to peace and service, providing insights into both personal growth and national governance. Through discussions on spirituality, science, education, and international relations, Modi’s approach underscores the importance of ethical decision-making and fostering unity amidst diversity. This exploration also delves into his reflections on literature, particularly Herman Hesse's "Siddhartha," offering a rich tapestry of wisdom and personal development.


In [None]:
print(concatenated_content)

In [10]:
prompt = (
    "==== INSTRUCTIONS ===="
    "Review the above provided SubContext and write a short conclusion section:\n "
    "	1.	The conclusion should be no more than three (3) sentences. \n"
    "	2.	The conclusion should capture the reads attention. They should leave with the feeling that they have learned something from the material. \n"
    "	3.	Do not introduce anything that is not present in the material provided. \n"
    "   4.  Always respond in English language (en)."
)

system_prompt = (
    "Respond in english language (en)."
    "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed,"
     "objective and structured reports on given text. \n"
     "The reports you create are well-structured, informative, in-depth, and include facts and numbers if available.\n"
     "The reports should provide a well-structured framework, including the main sections, subsections, and key points to be covered.\n"
     "Use markdown syntax and follow the APA format. \n"
)


conclusion = ollama.generate(
    model='llama3.3:latest',
    prompt=f"{concatenated_content}\n{prompt}",
    system=system_prompt,
    options={'temperature':0.5}
)

conclusion_text = conclusion.get('response')
print(conclusion_text)

In conclusion, the exploration of human interaction, travel experiences, and literary reflections, particularly through Herman Hesse's "Siddhartha," offers profound insights into the nature of reality, wisdom, and personal growth. Through these discussions, readers are invited to embrace a deeper understanding of themselves and the world around them, fostering a sense of unity and interconnectedness. Ultimately, this journey of discovery encourages readers to approach life with an open heart and mind, valuing the wisdom that can be gleaned from both Eastern philosophical traditions and personal experiences.


In [11]:
draft_report = (
    "==== INTRODUCTION ==== \n"
    f"{introduction_text} \n\n"
    "==== REPORT BODY ==== \n"
    f"{main_body_text} \n\n"
    "==== CONCLUSION ==== \n"
    f"{conclusion_text}"
)



prompt = (
    "==== INSTRUCTIONS ===="
    "Review the above provided Sections INTRODUCTION, REPORT BODY, CONCLUSION and integrate then into the final professional report:\n "
    "	1.	Adhere to all Markdown coding rules \n"
    "	2.	Create an Introduction section by adding the INTRODUCTION context \n"
    "	3.	Review REPORT BODY and remove any introduction or conclusion sections. \n"
    "   4.  Review REPORT BODY for consistency do NOT remove any detail already provided. \n"
    "	5.	Create an Conclusion section by adding the CONCLUSION context \n"
    "   6.  Double check that there is only one Introduction section and one Conclusion section in the report."
)

system_prompt = (
    "You are a copywriting editor specializing in refining and polishing pre-existing documents to prepare them for publication. \n"
    "You are an expert in using Markdown. \n"
)

# Generate a response using the 'llama3.2' model
final_report = ollama.generate(
    model='summary-assistant',
    prompt=f"{draft_report}\n{prompt}",
    system=system_prompt
)

# Print the generated response
print(final_report.get('response'))

# Narendra Modi: A Journey from Humble Beginnings to Leadership

## Introduction

Narendra Modi, Prime Minister of India, offers a profound glimpse into his life story, spiritual practices, and leadership philosophy, highlighting his journey from humble beginnings to leading the world's largest democracy. His narrative weaves together deep cultural roots, rigorous self-discipline, and a commitment to peace and service, providing insights into both personal growth and national governance. Through discussions on spirituality, science, education, and international relations, Modi’s approach underscores the importance of ethical decision-making and fostering unity amidst diversity. This exploration also delves into his reflections on literature, particularly Herman Hesse's "Siddhartha," offering a rich tapestry of wisdom and personal development.

## Leadership Philosophy and Spiritual Practices

### Early Life and Upbringing

Narendra Modi was born into a modest family in Vadnagar, Gujara

In [12]:
with open('final_report.txt', 'w', encoding='utf-8') as file:
    file.write(final_report.get('response'))

In [16]:
with open('final_report.txt', 'r', encoding='utf-8') as file:
    final_report_text = file.read()

In [17]:
import markdown2
from weasyprint import HTML

def markdown_to_pdf_weasyprint(markdown_content, output_pdf_path):
    # Convert Markdown to HTML
    html_content = markdown2.markdown(markdown_content)
    
    # Convert HTML to PDF
    HTML(string=html_content).write_pdf(output_pdf_path)

# Example usage
markdown_content = """
# Sample Markdown

This is a sample Markdown content to demonstrate conversion to PDF.

## Features

- Easy to use
- Lightweight
- Customizable
"""

output_pdf_path = 'output_weasyprint.pdf'
markdown_to_pdf_weasyprint(final_report_text, output_pdf_path)
print(f'PDF generated successfully at {output_pdf_path}')

PDF generated successfully at output_weasyprint.pdf


In [19]:
from checkpoint import set_checkpoint_directory,checkpoint
set_checkpoint_directory('.')
@checkpoint('test')
def test_call():
    print("This is a test!")

test_call()

Skipping 'test_call' as checkpoint 'test' exists.
