In [1]:
import json
file_name="week-11"
# Open and load JSON file
with open("/home/mikey-sharma/projects/nyu/zoom-classes/raw-transcripts/" + file_name + ".json", "r") as file:
    data = json.load(file)  # Convert JSON into Python dictionary



In [2]:
newdata = data['result']['transcriptList']
all_texts = [transcript['text'] for transcript in newdata]
print(all_texts)



In [3]:

# Usage
input_path = "/home/mikey-sharma/projects/nyu/zoom-classes/raw-transcripts/" + file_name + ".txt"
output_path = "/home/mikey-sharma/projects/nyu/zoom-classes/processed-transcripts/" + file_name + "-summary.md"

# Save to a text file
with open(input_path, "w", encoding="utf-8") as file:
    file.write("\n".join(all_texts))  # Join texts with a newline

print("Data saved successfully in " + input_path)

Data saved successfully in /home/mikey-sharma/projects/nyu/zoom-classes/raw-transcripts/week-11.txt


In [4]:
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Get API Key from environment variables
google_api_key = os.getenv("GEMINI_API_KEY")
if not google_api_key:
    raise ValueError("Gemini API key not found in environment variables")


# Configure the Gemini model with LangChain
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",  # Use "gemini-1.5-pro" or "gemini-1.5-ultra" for more detailed responses
    temperature=0.75,  # Increase slightly for more expressive and creative summaries
    max_output_tokens=8192,  # Adjust token limit for longer output (if supported)
    timeout=120,  # Set a higher timeout for large transcripts
    max_retries=3,  # Increase retries in case of API failures
    api_key=google_api_key,  # Pass your Gemini API key
    verbose=True,  # Enables logging for debugging
    top_p=0.9,  # Controls diversity of responses (higher values = more diverse)
    top_k=50,  # Limits the model's choices to the top-k tokens
)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:

def generate_summary(input_file, output_file):
    # Read the transcript
    with open(input_file, "r", encoding="utf-8") as file:
        transcript = file.read()

    # Enhanced detailed prompt
    prompt = (
        "You are an AI assistant specializing in creating **highly detailed, structured, and lengthy** class summaries. "
        "Your task is to analyze the following transcript and generate a **comprehensive** summary that ensures complete understanding.\n\n"
        "### Instructions:\n"
        "1. **Introduction:**\n"
        "   - Provide the class subject, instructor name, and date.\n"
        "   - Clearly state the **learning objectives** and the instructor's teaching approach.\n"
        "   - Include a brief overview of the **topics covered** and their relevance.\n\n"
        "2. **Key Topics:**\n"
        "   - Break down each topic into **subtopics** and explain them in detail.\n"
        "   - Include **real-world examples, case studies, and formulas** wherever applicable.\n"
        "   - Highlight any **visual aids, diagrams, or code snippets** used during the class.\n"
        "   - **Explain all steps** discussed in the class, including any **workflow processes, algorithms, or methodologies**.\n"
        "   - Provide **detailed derivations or explanations** for all formulas introduced, including their significance and applications.\n"
        "   - Include **step-by-step breakdowns in the key points** of examples or problems solved during the class, ensuring clarity and completeness.\n\n"
        "3. **Exercises & Discussions:**\n"
        "   - Summarize all **problem-solving activities, coding exercises, and group discussions**.\n"
        "   - Provide **detailed explanations** of the solutions and answers to student questions.\n"
        "   - Mention any **common mistakes** or challenges faced by students.\n"
        "   - Include **step-by-step walkthroughs** of exercises, highlighting the reasoning behind each step.\n\n"
        "4. **Important Announcements:**\n"
        "   - Highlight **assignments, deadlines, and grading criteria**.\n"
        "   - List any **additional resources** (e.g., books, papers, tools) recommended by the instructor.\n"
        "   - Include any **updates or changes** to the course schedule.\n\n"
        "5. **Final Takeaways:**\n"
        "   - Summarize the **key learnings** from the class.\n"
        "   - Suggest **follow-up actions** for students, such as further reading or practice exercises.\n"
        "   - Provide a **motivational note** to encourage students to apply what they've learned.\n\n"
        "   - Suggest **follow-up actions** for students, such as further reading or practice exercises.\n"
        "   - Provide a **motivational note** to encourage students to apply what they've learned.\n\n"
        "### Additional Requirements:\n"
        "- Use **Markdown formatting** for headings, bullet points, and code blocks.\n"
        "- Ensure the summary is **detailed, well-structured, and much longer** than typical summaries.\n"
        "- Avoid omitting any important details from the transcript.\n"
        "- **Explicitly include all steps, formulas, and examples** discussed in the class, ensuring they are explained in a clear and accessible manner.\n\n"
        f"### Class Transcript:\n{transcript}"
    )
    # Generate the summary
    response = llm.invoke(prompt)

    # Extract text (if using Langchain's response format)
    summary = response.content if hasattr(response, "content") else str(response)

    # Save the summary to the output file
    with open(output_file, "w", encoding="utf-8") as file:
        file.write(summary)

    print(f"Summary saved successfully in {output_file}")

generate_summary(input_path, output_path)


Summary saved successfully in /home/mikey-sharma/projects/nyu/zoom-classes/processed-transcripts/week-11-summary.md


In [5]:
import google.generativeai as genai
import os
from dotenv import load_dotenv

# Load API key
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
    raise ValueError("Gemini API key not found in environment variables")

# Configure Google Generative AI
genai.configure(api_key=api_key)

# List available models
models = genai.list_models()
print("Available Models:")
for model in models:
    print(model.name)


Available Models:
models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.5-pro-exp-03-25
models/gemini-2.5-pro-preview-03-25
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-exp-image-generation
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-fla