<a href="https://colab.research.google.com/github/Coltcult/fantastic-computing-machine/blob/main/PDF_Processing_and_Memory_Aid_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow import keras
import io  # Changed from io.BytesIO
from google.colab import files
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
import heapq
import time  # Import the time module
import numpy as np # Import numpy

# Download required NLTK data with timeout and error handling
try:
    print("[nltk_data] Downloading package punkt...")
    start_time = time.time()
    nltk.download('punkt', quiet=False)  # Set quiet=False to see download output
    print(f"[nltk_data]   Downloaded punkt.zip in {time.time() - start_time:.2f} seconds.")

    print("[nltk_data] Downloading package stopwords...")
    start_time = time.time()
    nltk.download('stopwords', quiet=False)
    print(f"[nltk_data]   Downloaded stopwords.zip in {time.time() - start_time:.2f} seconds.")

except Exception as e:
    print(f"Error downloading NLTK data: {e}")
    print("Please ensure you have a stable internet connection.  The script may still run, but some features will be unavailable.")
    # Consider exiting or providing alternative functionality

# --- Helper Functions ---

def extract_text_from_pdf(file_content):
    """
    Extracts text from a PDF file.

    Args:
        file_content (bytes): The content of the PDF file.

    Returns:
        str: The extracted text, or an error message if extraction fails.
    """
    try:
        import PyPDF2 # Import inside the function
        # Use BytesIO to handle the file content as bytes
        pdf_file = io.BytesIO(file_content) # Changed from BytesIO
        pdf_reader = PyPDF2.PdfReader(pdf_file)  # Corrected class name
        text = ""
        for page_num in range(len(pdf_reader.pages)):
            text += pdf_reader.pages[page_num].extract_text() or ""  # Handle None
        return text
    except Exception as e:
        return f"Error extracting text: {e}"

def clean_text(text):
    """
    Cleans the text by removing non-alphanumeric characters,
    converting to lowercase, and removing extra spaces.

    Args:
        text (str): The text to clean.

    Returns:
        str: The cleaned text.
    """
    try:
        text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
        text = text.lower()
        text = re.sub(r'\s+', ' ', text).strip()
        return text
    except Exception as e:
        return f"Error cleaning text: {e}"

def summarize_text(text, num_sentences=5):
    """
    Summarizes the text using a basic frequency-based approach.

    Args:
        text (str): The text to summarize.
        num_sentences (int, optional): The number of sentences in the summary. Defaults to 5.

    Returns:
        str: The summarized text, or an error message if summarization fails.
    """
    try:
        sentences = sent_tokenize(text)
        stop_words = set(stopwords.words('english'))
        word_frequencies = {}
        for word in word_tokenize(text):
            if word not in stop_words:
                if word not in word_frequencies:
                    word_frequencies[word] = 1
                else:
                    word_frequencies[word] += 1

        sentence_scores = {}
        for sentence in sentences:
            for word in word_tokenize(sentence):
                if word in word_frequencies:
                    if sentence not in sentence_scores:
                        sentence_scores[sentence] = word_frequencies[word]
                    else:
                        sentence_scores[sentence] += word_frequencies[word]

        best_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
        return " ".join(best_sentences)
    except Exception as e:
        return f"Error summarizing text: {e}"

def generate_memory_prompts(text, num_prompts=3):
    """
    Generates memory prompts from the text.  Prompts are generated
    by extracting key sentences.

    Args:
        text (str): The text to generate prompts from.
        num_prompts (int, optional): The number of prompts to generate. Defaults to 3.

    Returns:
        list: A list of memory prompts, or an error message if generation fails.
    """
    try:
        sentences = sent_tokenize(text)
        # Very basic prompt generation (select top sentences)
        if len(sentences) > num_prompts:
          prompts = sentences[:num_prompts]
        else:
          prompts = sentences
        return prompts
    except Exception as e:
        return f"Error generating memory prompts: {e}"

def text_to_speech(text):
    """
    Converts text to speech using a simple print (for Colab).
    In a real application, use a proper TTS library.

    Args:
        text (str): The text to speak.
    """
    print(f"Speaking: {text}")  # Simulate TTS for Colab

def create_tflite_model():
    """
    Creates a very basic TensorFlow Lite model (for demonstration).
    This model doesn't actually do anything useful for this specific task,
    but it shows how to create a TFLite model.  For real PDF processing,
    we don't need a neural network.

    Returns:
        str:  The path to the TFLite model.
    """
    try:
        # Define a simple Keras model (for demonstration purposes only)
        model = keras.Sequential([
            keras.layers.Dense(10, input_shape=(10,), activation='relu'),
            keras.layers.Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

        # Create a dummy dataset (the model isn't really used here)

        x_train = np.random.rand(100, 10)
        y_train = np.random.randint(2, size=(100, 1))
        model.fit(x_train, y_train, epochs=1) # Train for one epoch

        # Convert the model to TensorFlow Lite
        converter = tf.lite.TFLiteConverter.from_keras_model(model)
        tflite_model = converter.convert()
        model_path = "converted_model.tflite"
        with open(model_path, 'wb') as f:
            f.write(tflite_model)
        return model_path
    except Exception as e:
        print(f"Error creating TFLite model: {e}")
        return None

def run_tflite_model(model_path):
    """
    Runs the TensorFlow Lite model (for demonstration).  This doesn't
    do anything meaningful for the PDF processing.

    Args:
        model_path (str): The path to the TFLite model.
    """
    try:
        # Load the TFLite model
        interpreter = tf.lite.Interpreter(model_path=model_path)
        interpreter.allocate_tensors()

        # Get input and output tensors.
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()

        # Create dummy input data and run the model.

        input_data = np.random.rand(1, 10).astype(np.float32) # Dummy input
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()

        # Get the output.
        output_data = interpreter.get_tensor(output_details[0]['index'])
        print(f"TensorFlow Lite model output: {output_data}")
    except Exception as e:
        print(f"Error running TFLite model: {e}")

# --- Main Function ---

def main():
    """
    Main function to orchestrate the PDF processing and memory aid.
    """
    print("Welcome to the AI-Powered Learning Assistant!")

    # Upload PDF file with progress indication and error handling
    print("Please upload your PDF file...")
    try:
        uploaded = files.upload()  # Use the correct Colab file upload
        if not uploaded:
            print("No file uploaded. Exiting.")
            return
    except Exception as e:
        print(f"Error during file upload: {e}")
        print("Please check your network connection and try again.")
        return

    # Get the file content as bytes
    try:
        file_name, file_content = next(iter(uploaded.items()))
    except Exception as e:
        print(f"Error accessing uploaded file: {e}")
        return

    # Extract text from PDF
    pdf_text = extract_text_from_pdf(file_content)
    if "Error" in pdf_text:
        print(pdf_text)  # Print the error message
        return

    # Clean the extracted text
    cleaned_text = clean_text(pdf_text)
    if "Error" in cleaned_text:
        print(cleaned_text)
        return

    # Offer options to the user
    while True:
        print("\nChoose an option:")
        print("1. Read the text aloud")
        print("2. Summarize the text")
        print("3. Generate memory prompts")
        print("4. Run TensorFlow Lite model (demo)")  # Keep this for demonstration
        print("5. Exit")

        choice = input("Enter your choice (1-5): ")

        if choice == '1':
            text_to_speech(cleaned_text)
        elif choice == '2':
            summary = summarize_text(cleaned_text)
            if "Error" in summary:
                print(summary)
            else:
                print(f"\nSummary:\n{summary}")
        elif choice == '3':
            prompts = generate_memory_prompts(cleaned_text)
            if "Error" in prompts:
                print(prompts)
            else:
                print("\nMemory Prompts:")
                for i, prompt in enumerate(prompts):
                    print(f"{i+1}. {prompt}")
        elif choice == '4':
            # Create and run the TFLite model (demonstration)
            tflite_model_path = create_tflite_model()
            if tflite_model_path: # only run if model was created
              run_tflite_model(tflite_model_path)
        elif choice == '5':
            print("Exiting...")
            break
        else:
            print("Invalid choice. Please try again.")

if __name__ == "__main__":
    main()

[nltk_data] Downloading package punkt...


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


[nltk_data]   Downloaded punkt.zip in 2.01 seconds.
[nltk_data] Downloading package stopwords...
[nltk_data]   Downloaded stopwords.zip in 0.09 seconds.
Welcome to the AI-Powered Learning Assistant!
Please upload your PDF file...


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
