## Install required libraries

In [None]:
!pip install transformers torch -q

## Import libraries

In [1]:
from transformers import pipeline
import warnings
import re
warnings.filterwarnings('ignore')

## model initialization

In [2]:
# Summarization Pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


In [3]:
# Question Answering Pipeline
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/496M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/79.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

Device set to use cuda:0


## main functions

In [4]:
def split_text_into_chunks(text, max_chunk_size=1000, overlap=100):
    sentences = re.split(r'[.!?]+', text)
    chunks = []
    current_chunk = ""

    for sentence in sentences:
        if len(current_chunk) + len(sentence) < max_chunk_size:
            current_chunk += sentence + ". "
        else:
            if current_chunk.strip():
                chunks.append(current_chunk.strip())
            current_chunk = sentence + ". "

    if current_chunk.strip():
        chunks.append(current_chunk.strip())

    return chunks

In [5]:
def merge_summaries(summaries):
    combined_text = " ".join(summaries)
    if len(combined_text) > 1000:
        return summarizer(combined_text, max_length=200, min_length=50, do_sample=False)[0]['summary_text']
    return combined_text

In [6]:
def summarize_text(text, max_length=150, min_length=30):
    text = text.strip()

    if len(text) < 50:
        return "Text is too short for summarization."

    if len(text) <= 1000:
        summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
        return summary[0]['summary_text']

    chunks = split_text_into_chunks(text, max_chunk_size=900)
    summaries = []

    for chunk in chunks:
        if len(chunk.strip()) >= 50:
            try:
                chunk_summary = summarizer(chunk, max_length=100, min_length=20, do_sample=False)
                summaries.append(chunk_summary[0]['summary_text'])
            except Exception as e:
                print(f"Error summarizing chunk: {e}")
                continue

    if not summaries:
        return "Could not summarize the text."

    final_summary = merge_summaries(summaries)
    return final_summary

In [7]:
def answer_question(question, context):
    context = context.strip()

    if len(context) <= 1000:
        result = qa_pipeline(question=question, context=context)
        return {
            'answer': result['answer'],
            'confidence': round(result['score'], 3)
        }

    chunks = split_text_into_chunks(context, max_chunk_size=800)
    best_answer = None
    best_confidence = 0

    for chunk in chunks:
        try:
            result = qa_pipeline(question=question, context=chunk)
            if result['score'] > best_confidence:
                best_confidence = result['score']
                best_answer = result['answer']
        except Exception as e:
            print(f"Error processing chunk: {e}")
            continue

    if best_answer:
        return {
            'answer': best_answer,
            'confidence': round(best_confidence, 3)
        }
    else:
        return {
            'answer': "Could not find an answer in the given text.",
            'confidence': 0
        }
