In [2]:
# Importing Required Libraries
from transformers import pipeline
from PIL import Image
import pytesseract
import speech_recognition as sr

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Setting Tesseract OCR Path
OCR_CONFIG_PATH = "C:/Program Files/Tesseract-OCR/tesseract.exe"
pytesseract.pytesseract.tesseract_cmd = OCR_CONFIG_PATH

In [4]:
# Initializing NLP Pipelines
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
sentiment_analyzer = pipeline("sentiment-analysis")
ner_analyzer = pipeline("ner", grouped_entities=True)

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision 4c53496 (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a Bert

In [5]:
# Function: Speech-to-Text (STT)
def STT(audio_file):
    """Convert speech from an audio file to text using Google Speech Recognition."""
    r = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio = r.record(source)
    try:
        text = r.recognize_google(audio)
    except sr.UnknownValueError:
        return "Google Speech Recognition could not understand audio"
    except sr.RequestError as e:
        return f"Could not request results from Google Speech Recognition service; {e}"

    return text

# Function: Image-to-Text (ITT)
def ITT(image_file):
    """Extract text from an image using Tesseract OCR."""
    img_file = Image.open(image_file)
    text = pytesseract.image_to_string(img_file)
    return text

# Function: Analyze Text
def analyze_text(text):
    """Analyze text to generate summary, sentiment, and named entities."""
    # Summarization
    summary = summarizer(text, min_length=30, max_length=130, do_sample=False)[0]['summary_text']
    # Sentiment Analysis
    sentiment = sentiment_analyzer(text)
    # Named Entity Recognition
    entities = ner_analyzer(text)

    return {
        "summary": summary,
        "sentiment": sentiment,
        "entities": entities
    }


In [6]:
# Function: Process File (Main Pipeline)
def summary_pipeline(file_path):
    """Process an image or audio file, extract text, and analyze it."""
    if file_path.endswith(".jpg") or file_path.endswith(".png"):
        text = str(ITT(file_path))
    elif file_path.endswith(".wav"):
        text = str(STT(file_path))
    else:
        raise ValueError(f"Invalid file format: {file_path}")

    # Analyze the extracted text
    analysis = analyze_text(text)
    return analysis


In [9]:
# Test text input for analysis
text_input = "My name is Amanda Olejniczak, I went to therapy for the first time on June 9, 2000, at age four but I never returned because my mother was blamed for my behavior. I still wonder how different life could have been had I continued to get the necessary help for my OCD and anxiety at that age. Later in life, at age sixteen, I was diagnosed with depression and anorexia; now at age twenty-six I also struggle with post-traumatic stress disorder due to my past relationship with my mother and her second husband. I have been in therapy for my mental illnesses consistently since I was diagnosed with anorexia at age sixteen, but it wasn’t until I met my current therapist after being discharged from treatment for the second time, that I believe the real work has begun. When I met my new therapist, I immediately felt seen and heard. This was something I had never experienced before; I didn’t think I deserved it. I had been told by previous therapists that I would not achieve anything and my mother was told to not expect anything out of me so she wouldn’t be disappointed. This therapist was different. She not only saw me for who I was, she saw the potential I had and continues to push me towards being the best version of myself. At twenty-six, my list of diagnoses, with the exception of anorexia, has not changed, but I’m finally learning how to better navigate my life while living with my mental illnesses. In this essay, I explicitly tell my story of how losing the toxic relationship with my mother left room for me to find myself. An entire year has gone by since I last heard from or seen my mother."
result_text = analyze_text(text_input)
print("Text Analysis:", result_text)

Text Analysis: {'summary': 'Amanda Olejniczak went to therapy for the first time on June 9, 2000, at age four. At age sixteen, she was diagnosed with depression and anorexia. She also struggle with post-traumatic stress disorder due to her past relationship with her mother and her second husband.', 'sentiment': [{'label': 'POSITIVE', 'score': 0.9856559038162231}], 'entities': [{'entity_group': 'PER', 'score': 0.98719156, 'word': 'Amanda Olejniczak', 'start': 11, 'end': 28}]}


In [12]:
# Audio File Analysis
result_audio = summary_pipeline('data/SUMM/voice_1.wav')
print("Audio Analysis:", result_audio)

Your max_length is set to 130, but your input_length is only 77. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=38)


Audio Analysis: {'summary': "In My Dreams during the day small noise is like Fireworks scared me I avoided crowd because they made me feel trapped sometimes I feel like I was done to explain better step by step. one day during a mission I lost my best friend in an explosion even though the war ended the memories didn't leave me at night.", 'sentiment': [{'label': 'NEGATIVE', 'score': 0.9889577031135559}], 'entities': [{'entity_group': 'MISC', 'score': 0.8633633, 'word': 'Dreams', 'start': 202, 'end': 208}]}


In [11]:
# Image File Analysis
result_image = summary_pipeline('data/SUMM/story_1.png')
print("Image Analysis:", result_image)

Image Analysis: {'summary': 'Sara was attacked by a stranger while walking home from a friend’s house. After the attack, she felt broken, like he had taken a piece of her soul. She avoided her family and friends, terrified that they’d see her differently. After months of therapy, she learned she could still find strength and joy in the world.', 'sentiment': [{'label': 'POSITIVE', 'score': 0.9683920741081238}], 'entities': [{'entity_group': 'PER', 'score': 0.92782956, 'word': 'Sara', 'start': 11, 'end': 15}]}
