In [None]:
# Importing Required Libraries
from transformers import pipeline
from PIL import Image
import pytesseract
import speech_recognition as sr

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
# Setting Tesseract OCR Path
OCR_CONFIG_PATH = "C:/Program Files/Tesseract-OCR/tesseract.exe"
pytesseract.pytesseract.tesseract_cmd = OCR_CONFIG_PATH

In [2]:
# Initializing NLP Pipelines
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
sentiment_analyzer = pipeline("sentiment-analysis")
ner_analyzer = pipeline("ner", grouped_entities=True)

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision 4c53496 (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a Bert

In [3]:
# Function: Speech-to-Text (STT)
def STT(audio_file):
    """Convert speech from an audio file to text using Google Speech Recognition."""
    r = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio = r.record(source)
    try:
        text = r.recognize_google(audio)
    except sr.UnknownValueError:
        return "Google Speech Recognition could not understand audio"
    except sr.RequestError as e:
        return f"Could not request results from Google Speech Recognition service; {e}"

    return text

# Function: Image-to-Text (ITT)
def ITT(image_file):
    """Extract text from an image using Tesseract OCR."""
    img_file = Image.open(image_file)
    text = pytesseract.image_to_string(img_file)
    return text

# Function: Analyze Text
def analyze_text(text):
    """Analyze text to generate summary, sentiment, and named entities."""
    # Summarization
    summary = summarizer(text, min_length=30, max_length=130, do_sample=False)[0]['summary_text']
    # Sentiment Analysis
    sentiment = sentiment_analyzer(text)
    # Named Entity Recognition
    entities = ner_analyzer(text)

    return {
        "summary": summary,
        "sentiment": sentiment,
        "entities": entities
    }


In [12]:
# Function: Process File (Main Pipeline)
def summary_pipeline(file_path):
    """Process an image or audio file, extract text, and analyze it."""
    if file_path.endswith(".jpg") or file_path.endswith(".png"):
        text = str(ITT(file_path))
    elif file_path.endswith(".wav"):
        text = str(STT(file_path))
    else:
        raise ValueError(f"Invalid file format: {file_path}")

    # Analyze the extracted text
    analysis = analyze_text(text)
    return analysis


In [None]:
# Test text input for analysis
text_input = "Alex sat in his room, the quiet almost too loud. The war was over, but it felt like it never left. Every night, memories flooded back—gunfire, explosions, the faces of people lost. One night, a loud sound from outside made his heart race. He froze, his mind taken back to the battlefield. The feeling of danger, the fear, it was all too real. He tried to shake it off, but it was hard. At work, he couldn’t focus. Even the simplest noises made him jump. His friends didn’t understand, and that made him feel even more alone. The hardest part was when he saw a child playing in the park. It reminded him of the innocent lives lost in the war. His hands shook, but he took a deep breath, reminding himself that he was home now. Healing was slow, but he knew he had to keep going."
result_text = analyze_text(text_input)
print("Text Analysis:", result_text)

Text Analysis: {'summary': 'The war was over, but it felt like it never left. Every night, memories flooded back. Even the simplest noises made him jump. The hardest part was when he saw a child playing in the park.', 'sentiment': [{'label': 'NEGATIVE', 'score': 0.6555995941162109}], 'entities': [{'entity_group': 'PER', 'score': 0.96746594, 'word': 'Alex', 'start': 0, 'end': 4}]}


In [None]:
# Audio File Analysis
result_audio = summary_pipeline('data/SUMM/voice_1.wav')
print("Audio Analysis:", result_audio)

In [10]:
# Image File Analysis
result_image = summary_pipeline('data/SUMM/story_1.png')
print("Image Analysis:", result_image)

Image Analysis: {'summary': 'During a mission, Alex lost his best friend in an explosion. The memories were so vivid it felt like he was still there, stuck in that terrible moment. Alex realized he couldn’t keep living this way. He started talking to a counselor and joined a support group.', 'sentiment': [{'label': 'POSITIVE', 'score': 0.9938408136367798}], 'entities': [{'entity_group': 'PER', 'score': 0.9918094, 'word': 'Alex', 'start': 11, 'end': 15}, {'entity_group': 'PER', 'score': 0.95194453, 'word': 'Talso', 'start': 1213, 'end': 1218}]}
