# **1. Installation**

In [1]:
!pip install google-generativeai
!pip install transformers
!pip install tika
!pip install PyPDF2
!pip install reportlab
!pip install googletrans==4.0.0-rc1


Collecting google-generativeai
  Downloading google_generativeai-0.7.1-py3-none-any.whl (163 kB)
     -------------------------------------- 163.9/163.9 kB 3.3 MB/s eta 0:00:00
Collecting google-api-core
  Downloading google_api_core-2.19.1-py3-none-any.whl (139 kB)
     -------------------------------------- 139.4/139.4 kB 8.1 MB/s eta 0:00:00
Collecting google-api-python-client
  Downloading google_api_python_client-2.136.0-py2.py3-none-any.whl (11.9 MB)
     ---------------------------------------- 11.9/11.9 MB 6.5 MB/s eta 0:00:00
Collecting google-ai-generativelanguage==0.6.6
  Downloading google_ai_generativelanguage-0.6.6-py3-none-any.whl (718 kB)
     -------------------------------------- 718.3/718.3 kB 7.5 MB/s eta 0:00:00
Collecting pydantic
  Downloading pydantic-2.8.0-py3-none-any.whl (423 kB)
     -------------------------------------- 423.1/423.1 kB 8.8 MB/s eta 0:00:00
Collecting google-auth>=2.15.0
  Downloading google_auth-2.31.0-py2.py3-none-any.whl (194 kB)
     ---

Collecting tika
  Downloading tika-2.6.0.tar.gz (27 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: tika
  Building wheel for tika (setup.py): started
  Building wheel for tika (setup.py): finished with status 'done'
  Created wheel for tika: filename=tika-2.6.0-py3-none-any.whl size=32624 sha256=bb4fdb9f90648cbe1982eb1b2b660d634e09710cf05d3b1e6ba00d8e1d223361
  Stored in directory: c:\users\bhavy\appdata\local\pip\cache\wheels\13\56\18\e752060632d32c39c9c4545e756dad281f8504dafcfac02b95
Successfully built tika
Installing collected packages: tika
Successfully installed tika-2.6.0
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
     -------------------------------------- 232.6/232.6 kB 3.5 MB/s eta 0:00:00
Installing collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
Collecting reportlab
  Downloading reportlab-4.2.2-py3-none-any.whl (1.9 MB)
     ---------

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
spyder 5.2.2 requires pyqt5<5.13, which is not installed.
spyder 5.2.2 requires pyqtwebengine<5.13, which is not installed.
anaconda-project 0.11.1 requires ruamel-yaml, which is not installed.
conda-repo-cli 1.0.20 requires clyent==1.2.1, but you have clyent 1.2.2 which is incompatible.
conda-repo-cli 1.0.20 requires nbformat==5.4.0, but you have nbformat 5.5.0 which is incompatible.


# code

In [None]:
import PyPDF2
import google.generativeai as genai
import json
from io import BytesIO
import requests
import multiprocessing
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
import google.generativeai as genai
from google.colab import userdata
import warnings
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
import markdown
import time
import urllib.parse
from googletrans import Translator, LANGUAGES
import os

warnings.filterwarnings("ignore")

# Configure the Google Generative AI API
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

def extract_text_from_pdf(pdf_path):
    with open(pdf_path, "rb") as file:
        pdf_reader = PyPDF2.PdfReader(file)
        text = ""
        for page_num in range(len(pdf_reader.pages)):
            page = pdf_reader.pages[page_num]
            text += page.extract_text()
    return text

def split_text_into_chunks(text, max_chunk_size=2000):
    sentences = text.split('. ')
    chunks = []
    current_chunk = ""

    for sentence in sentences:
        if len(current_chunk) + len(sentence) + 1 > max_chunk_size:
            chunks.append(current_chunk)
            current_chunk = sentence
        else:
            if current_chunk:
                current_chunk += ". " + sentence
            else:
                current_chunk = sentence

    if current_chunk:
        chunks.append(current_chunk)

    return chunks

def generate_content(prompt, max_tokens=1024, retries=3, wait=5):
    model = genai.GenerativeModel('gemini-pro')
    for attempt in range(retries):
        try:
            response = model.generate_content(prompt)
            if response.parts:
                return response.parts[0].text.strip()
            else:
                raise ValueError("No valid parts in response.")
        except Exception as e:
            print(f"Error: {e}")
            if attempt < retries - 1:
                print(f"Retrying in {wait} seconds...")
                time.sleep(wait)
            else:
                return f"Failed to generate content after {retries} attempts."

def create_prompts(text, task_type):
    prompts = {
        "mcq": f"Read the following text carefully and generate multiple-choice questions. Each question should include:\n"
               f"1. A clear and concise question based on the text.\n"
               f"2. Four answer options (A, B, C, D), with one correct answer clearly indicated.\n"
               f"3. The questions should cover key concepts, definitions, critical points, and significant details discussed in the text.\n"
               f"4. Ensure the options are plausible and relevant to the content.\n\n"
               f"Text:\n{text}\n\nMCQ:",
        "fill_in_the_blank": f"Read the following text thoroughly and generate fill-in-the-blank questions. Each question should include:\n"
                            f"1. A sentence from the text with one key term or concept replaced by a blank.\n"
                            f"2. The correct term or concept that completes the sentence accurately.\n"
                            f"3. Focus on important information, such as key terms, dates, names, and concepts that are critical to understanding the text.\n\n"
                            f"Text:\n{text}\n\nFill in the blank:",
        "short_answer": f"Read the following text attentively and generate short answer questions. Each question should include:\n"
                        f"1. A clear and specific question that requires a brief response.\n"
                        f"2. The response should address key points, explanations, or definitions provided in the text.\n"
                        f"3. Ensure the questions encourage critical thinking and comprehension of the material, focusing on important details and concepts.\n\n"
                        f"Text:\n{text}\n\nShort answer question:",
        "course": f"Read the following text and generate a comprehensive, structured course content. The content should include:\n"
                  f"1. Detailed learning objectives and outcomes for each chapter, helping students understand what they are expected to learn.\n"
                  f"2. A chapter-wise breakdown with detailed descriptions and subtopics, summarizing the main points for easy study.\n"
                  f"3. Key concepts, definitions, and explanations for each chapter, highlighting the essential information.\n"
                  f"4. Examples, illustrations, and case studies relevant to each chapter, to help students grasp practical applications.\n"
                  f"5. Practical exercises and activities for students to reinforce learning, ensuring they can apply what they have learned.\n"
                  f"6. Summaries and key takeaways for each chapter, so students can review the main points quickly.\n"
                  f"7. Supplementary resources and reading materials for further study, providing avenues for deeper exploration.\n"
                  f"8. Guidance notes and tips for teachers on how to effectively deliver the content and engage students.\n"
                  f"9. Incorporate interactive elements and multimedia content where possible, to enhance learning and retention.\n"
                  f"10. Ensure the curriculum is structured in a logical and progressive manner to facilitate a smooth learning experience.\n\n"
                  f"Text:\n{text}\n\nComprehensive Course Content:",
        "chat": f"Use the following text as a reference to answer questions based on its content.\n\n"
                f"Text:\n{text}\n\nQuestion:",
    }
    return prompts.get(task_type, "")



def get_user_input(prompt):
    return input(prompt)

def translate_text_google_translate(text, target_language):
    translator = Translator()
    translation = translator.translate(text, dest=target_language)
    return translation.text

def save_to_json(filename, data):
    with open(filename, 'w', encoding='utf-8') as json_file:
        json.dump(data, json_file, ensure_ascii=False, indent=4)

def load_from_json(filename):
    if os.path.exists(filename):
        with open(filename, 'r', encoding='utf-8') as json_file:
            return json.load(json_file)
    return {}

def talk_to_pdf(text_chunks):
    print("You can now talk to the PDF. Ask any questions based on its content.")
    while True:
        user_question = get_user_input("Your question (type 'exit' to quit): ")
        if user_question.lower() == 'exit':
            break
        responses = []
        for chunk in text_chunks:
            prompt = create_prompts(chunk, "chat") + user_question
            response = generate_content(prompt)
            responses.append(response)
        print("Response:")
        print(" ".join(responses))
        print()

def chatbot():
    pdf_path = get_user_input("Enter the path to your PDF file: ")
    pdf_text = extract_text_from_pdf(pdf_path)

    text_chunks = split_text_into_chunks(pdf_text)

    while True:
        main_choice = get_user_input("Choose an option: (1) Generate Course, (2) Generate Questions, (3) Talk to the PDF: ").lower()
        if main_choice not in ['1', '2', '3']:
            print("Invalid choice. Please choose '1', '2', or '3'.")
            continue

        if main_choice == '1':
            prompt = create_prompts(pdf_text, "course")
            course_outline = generate_content(prompt)
            print("Course Outline and Lesson Plan (in English):")
            print(course_outline)
            save_to_json('course_outline.json', {"course_outline": course_outline})

            translate_choice = get_user_input("Do you want to translate the course outline? (yes/no): ").lower()
            if translate_choice == 'yes':
                languages = {v: k for k, v in LANGUAGES.items()}  # Reverse lookup
                print("Available languages:")
                for code, language in languages.items():
                    print(f"{language} ({code})")
                language_choice = get_user_input("Enter the language code you want to translate to: ")
                if language_choice in languages:
                    translated_outline = translate_text_google_translate(course_outline, language_choice)
                    print(f"Translated Course Outline ({languages[language_choice]}):")
                    print(translated_outline)
                    save_to_json(f'course_outline_{language_choice}.json', {"course_outline": translated_outline})
                else:
                    print("Invalid language code.")

        elif main_choice == '2':
            previous_question_type = None
            all_questions = load_from_json('questions.json')
            while True:
                question_type = get_user_input("Choose the type of questions to generate (mcq, fill_in_the_blank, short_answer): ").lower()
                if question_type not in ['mcq', 'fill_in_the_blank', 'short_answer']:
                    print("Invalid choice. Please choose either 'mcq', 'fill_in_the_blank', or 'short_answer'.")
                    continue
                if question_type == previous_question_type:
                    print(f"You've already generated {question_type} questions. Please choose a different type.")
                    continue

                num_questions = int(get_user_input("Enter the number of questions to generate (5, 10, 15): "))
                if num_questions not in [5, 10, 15]:
                    print("Invalid number of questions. Please choose either 5, 10, or 15.")
                    continue

                questions = []
                for chunk in text_chunks:
                    prompt = create_prompts(chunk, question_type)
                    question = generate_content(prompt)
                    questions.append(question)

                all_questions[question_type] = questions[:num_questions]
                save_to_json('questions.json', all_questions)

                print(f"{question_type.upper()} Questions (in English):")
                for question in questions[:num_questions]:
                    print(question)
                    print()

                translate_choice = get_user_input("Do you want to translate the questions? (yes/no): ").lower()
                if translate_choice == 'yes':
                    languages = {v: k for k, v in LANGUAGES.items()}  # Reverse lookup
                    print("Available languages:")
                    for code, language in languages.items():
                        print(f"{language} ({code})")
                    language_choice = get_user_input("Enter the language code you want to translate to: ")
                    if language_choice in languages:
                        translated_questions = []
                        for question in questions[:num_questions]:
                            translated_question = translate_text_google_translate(question, language_choice)

                            translated_questions.append(translated_question)
                            print(f"Translated {question_type.upper()} Question ({languages[language_choice]}):")
                            print(translated_question)
                        all_questions[f"{question_type}_{language_choice}"] = translated_questions
                        save_to_json('questions.json', all_questions)
                    else:
                        print("Invalid language code.")

                another_round = get_user_input("Do you want to generate a different type of questions? (yes/no): ").lower()
                if another_round != 'yes':
                    break
                previous_question_type = question_type

        elif main_choice == '3':
            talk_to_pdf(text_chunks)

        another_main_round = get_user_input("Do you want to perform another operation? (yes/no): ").lower()
        if another_main_round != 'yes':
            break

# Start the chatbot
if __name__ == "__main__":
    chatbot()


Enter the path to your PDF file: /content/new_resume.pdf
Choose an option: (1) Generate Course, (2) Generate Questions, (3) Talk to the PDF: 3
You can now talk to the PDF. Ask any questions based on its content.
Your question (type 'exit' to quit): what is this all about
Response:
Unfortunately, I do not have access to any reference text, so I am unable to answer this question from the provided context. This document is a resume of Abhishek Sinha, an AI engineer with experience in machine learning, deep learning, and cloud computing. He has worked on projects such as an AI-based multilingual course generator and has experience with Google Cloud, AWS, and Azure. His skills include prompt engineering, generative AI, working with LLM, API integration, AI systems, and more. He has a Bachelor of Engineering from Acharya Institute of Technology and an Executive Post Graduate Certification in Data Science & Artificial Intelligence from iHub, Divya Sampark IIT Roorkee.

Your question (type 'ex