# 📄 Business Document Summarization Pipeline
This notebook demonstrates a complete pipeline to summarize business documents such as reports, meeting notes, and contracts using transformer-based models. It supports batch processing and saves results in structured format.

In [None]:
!pip install transformers torch --quiet

In [None]:
import os
import glob
import re
import json
from datetime import datetime
from transformers import pipeline
from typing import List, Dict

In [None]:
def extract_table_name(filename):
    match = re.match(r'^([a-zA-Z0-9_]+?)_\d+', filename)
    if match:
        return match.group(1)
    return filename  # fallback if no match

# Example usage
filename = "table_abc_17988"
table_name = extract_table_name(filename)
print(table_name
      
      

In [None]:
print("[INFO] Loading summarization model...")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

In [None]:
def clean_text(text: str) -> str:
    """Remove excess whitespace and normalize text."""
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

In [None]:
def summarize_text(text: str, min_length=40, max_length=150) -> str:
    """Generate a summary from input text."""
    result = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
    return result[0]['summary_text']

In [None]:
def summarize_file(filepath: str) -> Dict[str, str]:
    with open(filepath, 'r', encoding='utf-8') as f:
        raw = f.read()
    cleaned = clean_text(raw)
    summary = summarize_text(cleaned)
    return {"filename": os.path.basename(filepath), "summary": summary}

In [None]:
# Setup input directory
input_dir = "./business_docs"
os.makedirs(input_dir, exist_ok=True)
print(f"[INFO] Place .txt files in: {input_dir}")

In [None]:
# Sample file for demonstration
sample_content = '''
Q2 Strategy Meeting:
The sales team presented a plan to expand into Asia-Pacific with support from the product team. Budget increases are expected for international marketing campaigns.
Compliance raised concerns over data localization laws in Singapore and India. Legal advised engaging local counsel for contracts. All action items are due next quarter.
'''
sample_path = os.path.join(input_dir, "q2_strategy_notes.txt")
with open(sample_path, 'w', encoding='utf-8') as f:
    f.write(sample_content)
print(f"[INFO] Created sample file: {sample_path}")

In [None]:
# Batch summarization logic
def summarize_directory(directory: str) -> List[Dict[str, str]]:
    summaries = []
    for file_path in glob.glob(os.path.join(directory, '*.txt')):
        print(f"Summarizing: {file_path}")
        summary = summarize_file(file_path)
        print(summary['summary'])
        summaries.append(summary)
    return summaries

In [None]:
# Save results to JSON
results = summarize_directory(input_dir)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_file = f"summarized_documents_{timestamp}.json"
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(results, f, indent=4)
print(f"[INFO] Saved summaries to {output_file}")

In [None]:
# fine tuning Fine-tuning DistilBERT with PEFT (LoRA)


In [None]:
from datasets import load_dataset
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import torch

# --- Load Dataset ---
# Using CNN/DailyMail for demonstration. In an enterprise setting, you'd load your own data.
# For true extractive summarization, your dataset needs sentence-level labels.
# This example will adapt CNN/DailyMail for a simpler classification-like task.
try:
    dataset = load_dataset("cnn_dailymail", "3.0.0", split="train[:10000]") # Load a smaller subset for faster demo
    eval_dataset = load_dataset("cnn_dailymail", "3.0.0", split="validation[:1000]")
except Exception as e:
    print(f"Could not load cnn_dailymail dataset: {e}. Please ensure you have internet access or specify a local path.")
    print("Loading a dummy dataset for demonstration purposes.")
    # Create a dummy dataset if loading fails
    from datasets import Dataset
    dummy_data = {
        "article": [
            "This is the first article about business. It talks about financial growth and market trends. The economy is expanding quickly.",
            "Another document discusses legal contracts. It highlights clauses related to data privacy and regulatory compliance. This is very important."
        ],
        "highlights": [
            "Financial growth is a key topic. Economy is expanding.",
            "Legal contracts focus on data privacy."
        ]
    }
    dataset = Dataset.from_dict(dummy_data)
    eval_dataset = Dataset.from_dict(dummy_data) # Use dummy for eval too

# --- Load Tokenizer ---
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

# --- Preprocessing Function ---
# For extractive summarization, you'd typically predict if each sentence is a summary sentence.
# Here, for demonstration with CNN/DailyMail, we'll simplify.
# We'll tokenize the article and set a dummy label as this model expects labels for sequence classification.
# In a real extractive scenario, you'd have labels indicating summary sentences.
def preprocess_function(examples):
    # Tokenize the article
    model_inputs = tokenizer(examples["article"], max_length=512, truncation=True, padding="max_length")

    # In a real extractive task, 'labels' would be derived from sentence-level annotations.
    # For this demonstration, we'll create dummy labels for sequence classification.
    # This assumes a simplified scenario where the model predicts *something* about the whole article.
    # For robust extractive summarization, you need to classify each sentence.
    # E.g., for each sentence, is_summary = 1 or 0. This typically involves a different model head.
    # For now, we'll just set a dummy label (e.g., first token ID or a constant) if labels are required by the model,
    # or you'd use a different model type for token/sequence classification on sentences.
    # Since DistilBertForSequenceClassification expects a single label per sequence:
    # We'll just use a placeholder. In a real scenario, you'd classify based on the content.
    # A simple approach for a dummy label might be to hash the highlight text or use a constant.
    # For demonstration, let's just make sure there's a 'label' field.
    # If using for pure unsupervised feature extraction, you might not need labels for initial stages.
    # For supervised fine-tuning, you must have labels.

    # Let's create a dummy binary label for demonstration:
    # If the highlights contain specific keywords (e.g., "financial"), set label to 1, else 0.
    # This is *not* how extractive summarization is truly labeled, but it demonstrates the structure.
    model_inputs["labels"] = [1 if "financial" in h.lower() else 0 for h in examples["highlights"]]

    return model_inputs

# Apply preprocessing
tokenized_dataset = dataset.map(preprocess_function, batched=True)
tokenized_eval_dataset = eval_dataset.map(preprocess_function, batched=True)

# Remove original text columns to save memory if not needed by the model directly
tokenized_dataset = tokenized_dataset.remove_columns(["article", "highlights"])
tokenized_eval_dataset = tokenized_eval_dataset.remove_columns(["article", "highlights"])

# Set format for PyTorch
tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
tokenized_eval_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

print("--- Dataset Prepared ---")
print(f"Training dataset size: {len(tokenized_dataset)}")
print(f"Validation dataset size: {len(tokenized_eval_dataset)}")
print(tokenized_dataset[0]) # Example of a tokenized entry

In [None]:
# 2. Fine-tuning DistilBERT with PEFT (LoRA)
# We'll use Parameter-Efficient Fine-Tuning (PEFT), specifically LoRA, to adapt DistilBERT to our (simulated) extractive summarization task. This significantly reduces the computational resources required for fine-tuning.

In [None]:
from transformers import TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, TaskType

# --- Load DistilBERT Model for Sequence Classification ---
# For true extractive summarization, you might use DistilBertForTokenClassification
# to predict a label for each token (e.g., part of summary sentence).
# DistilBertForSequenceClassification is used here for simplicity with dummy labels.
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2) # 2 labels for our dummy binary classification

# --- Define LoRA Configuration ---
# Target modules are usually the attention query and value projections.
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS, # Specify the task type
    r=64,                      # The rank of the update matrices
    lora_alpha=16,             # The scaling factor for LoRA
    lora_dropout=0.1,          # Dropout probability for LoRA layers
    bias="none",               # Bias can be 'none', 'all', or 'lora_only'
    target_modules=["q_lin", "k_lin", "v_lin"], # Common target modules for BERT-like models
)

# Apply LoRA to the base model
model = get_peft_model(model, peft_config)
print("\n--- PEFT Model Trainable Parameters ---")
model.print_trainable_parameters() # Shows how many parameters are actually being trained (very few!)

# --- Training Arguments ---
training_args = TrainingArguments(
    output_dir="./distilbert-summarization-peft", # Directory to save checkpoints
    num_train_epochs=3,                          # Number of training epochs
    per_device_train_batch_size=8,               # Batch size per GPU/CPU for training
    per_device_eval_batch_size=8,                # Batch size per GPU/CPU for evaluation
    warmup_steps=500,                            # Number of warmup steps for learning rate scheduler
    weight_decay=0.01,                           # Strength of weight decay
    logging_dir="./logs",                        # Directory for storing logs
    logging_steps=100,                           # Log every X updates steps
    evaluation_strategy="epoch",                 # Evaluate at the end of each epoch
    save_strategy="epoch",                       # Save model at the end of each epoch
    load_best_model_at_end=True,                 # Load the best model after training
    metric_for_best_model="eval_loss",           # Metric to monitor for best model
    push_to_hub=False,                           # Don't push to Hugging Face Hub for enterprise internal models
)

# --- Initialize Trainer ---
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    eval_dataset=tokenized_eval_dataset,
    processing_class=tokenizer,
    #tokenizer=tokenizer, # Pass tokenizer to trainer for padding/truncation during evaluation if needed
)

# --- Train the Model ---
print("\n--- Starting Model Training ---")
trainer.train()
print("\n--- Model Training Complete ---")

# --- Save the fine-tuned LoRA adapter ---
# This saves only the small LoRA weights, not the entire DistilBERT model.
# You can then load the base DistilBERT and add these adapters to it.
model.save_pretrained("./distilbert-summarization-peft/lora_adapter")
tokenizer.save_pretrained("./distilbert-summarization-peft/tokenizer")

In [None]:
# 3. Deployment with FastAPI
# Now, let's set up a basic REST API using FastAPI to serve our fine-tuned DistilBERT model. This allows other enterprise applications to easily integrate with the summarization service.

In [None]:
#app.py

# app.py
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from peft import PeftModel, PeftConfig
import torch

app = FastAPI(title="Enterprise Extractive Summarization API")

# --- Load Model and Tokenizer ---
# Load the base model
base_model_name = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(base_model_name)
base_model = DistilBertForSequenceClassification.from_pretrained(base_model_name, num_labels=2) # Ensure num_labels matches training

# Load the PEFT adapter
# Make sure this path points to where you saved your LoRA adapter
lora_adapter_path = "./distilbert-summarization-peft/lora_adapter"
try:
    model = PeftModel.from_pretrained(base_model, lora_adapter_path)
    model = model.merge_and_unload() # Merge LoRA weights into the base model for faster inference
    print("LoRA adapter loaded and merged successfully.")
except Exception as e:
    print(f"Could not load LoRA adapter from {lora_adapter_path}: {e}")
    print("Running with base DistilBERT model (without PEFT fine-tuning).")
    model = base_model # Fallback to base model if adapter fails to load

model.eval() # Set model to evaluation mode

# --- Define Request Body Schema ---
class Article(BaseModel):
    text: str

# --- Summarization Endpoint ---
@app.post("/summarize/")
def create_summary(article: Article):
    """
    Analyzes an input article to identify key sentences for an extractive summary.
    This model performs a classification task on the input,
    so the output 'summary' will be based on the model's classification logic,
    which in our demo was a dummy binary prediction.
    For a real extractive model, you would pass sentences and get scores.
    """
    input_text = article.text

    # Tokenize the input text
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True, padding="max_length")

    # Perform inference
    with torch.no_grad():
        outputs = model(**inputs)
        # For a sequence classification model, outputs.logits will be raw scores.
        # For our dummy example, it's a binary prediction.
        # In a true extractive model, you'd process sentence embeddings and classify.
        predicted_class = torch.argmax(outputs.logits, dim=-1).item()

    # --- SIMULATED EXTRACTIVE SUMMARY LOGIC ---
    # This part is highly dependent on how your extractive model is designed.
    # For a real extractive model, you would typically:
    # 1. Split the input_text into sentences.
    # 2. Process each sentence through your model to get a "summary score" or classification.
    # 3. Select the top-N scoring sentences to form the summary.

    # For this demonstration with a sequence classification model trained on dummy labels:
    # We'll just return a message indicating the classification.
    # Replace this with your actual extractive logic based on your fine-tuned model's output.
    if predicted_class == 1:
        # If our dummy classifier predicts 1 (e.g., "contains financial keywords")
        return {"summary_type": "Extractive Classification Result", "message": "This article seems to be highly relevant or contains key information.", "prediction_label": predicted_class}
    else:
        return {"summary_type": "Extractive Classification Result", "message": "This article is less relevant or contains less critical information.", "prediction_label": predicted_class}

# To run this API, save the above code as `app.py` and then execute:
# uvicorn app:app --reload --host 0.0.0.0 --port 8000



In [None]:
# 4. Running the API
# Save the app.py file in the same directory where you ran the fine-tuning script (so it can find the lora_adapter directory).
# Open your terminal or command prompt in that directory.
# Run the FastAPI application:

In [None]:
uvicorn app:app --reload --host 0.0.0.0 --port 8000

In [None]:
# 5. Testing the API
# Once the API is running, you can test it using a tool like curl or by visiting the interactive API documentation provided by FastAPI.

# Access FastAPI Docs: Open your web browser and go to http://127.0.0.1:8000/docs (or your chosen host/port). You'll see the Swagger UI where you can interact with your API.

# Using curl (Example):
# Open a new terminal and send a POST request.

curl -X POST "http://127.0.0.1:8000/summarize/" \
     -H "Content-Type: application/json" \
     -d '{"text": "This is a very important business report. It discusses our financial performance in the last quarter, highlighting strong revenue growth and significant cost savings. The board meeting minutes also confirmed these positive results and outlined future strategic investments. Our next steps involve expanding into new markets and optimizing our supply chain further."}'

In [None]:
import os
import torch
import numpy as np # Import numpy to check version and ensure it's loaded correctly
from datasets import load_dataset, Dataset, DatasetDict # Import DatasetDict
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from transformers import TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, TaskType

# --- Critical: Ensure compatible NumPy version in Colab ---
# This MUST be at the very top of your Colab notebook and run before anything else.
# Restart runtime after running this cell.
try:
    import numpy as np
    if np.__version__.startswith("2."):
        print("Downgrading NumPy to a stable 1.x version...")
        !pip install "numpy<2" --force-reinstall
        import numpy as np # Re-import numpy to load the new version
        print(f"NumPy version after re-installation: {np.__version__}")
    else:
        print(f"NumPy version: {np.__version__} (already compatible)")
except ImportError:
    print("NumPy not found. Installing compatible version...")
    !pip install "numpy<2" --force-reinstall
    import numpy as np
    print(f"NumPy version after installation: {np.__version__}")


# --- Critical: Update transformers library ---
# Run this after NumPy fix and before other imports if you still get TypeError for TrainingArguments
# !pip install --upgrade transformers

# --- 1. Data Preparation (Conceptual with Synthetic Fallback) ---
print("--- Starting Data Preparation ---")

# Try to load CNN/DailyMail dataset; fall back to synthetic data if failed.
try:
    # A more robust way to load a subset of a large dataset if direct slicing in split fails.
    # We'll load the full splits and then select a small portion for demonstration.
    # For actual training, remove the [:N] slicing.
    full_dataset = load_dataset("cnn_dailymail", "3.0.0")
    dataset = full_dataset["train"].select(range(10000)) # Load 10,000 training examples
    eval_dataset = full_dataset["validation"].select(range(1000)) # Load 1,000 validation examples
    print("Successfully loaded CNN/DailyMail dataset.")
except Exception as e:
    print(f"Could not load cnn_dailymail dataset: {e}.")
    print("Loading a dummy synthetic dataset for demonstration purposes.")
    # Create a synthetic dummy dataset if loading from Hugging Face fails.
    dummy_data = {
        "article": [
            "This is the first article about business. It talks about financial growth and market trends. The economy is expanding quickly. Our revenues increased by 15% this quarter.",
            "Another document discusses legal contracts. It highlights clauses related to data privacy and regulatory compliance. This is very important for our new GDPR strategy.",
            "Meeting minutes from yesterday cover product development updates. The team decided to prioritize feature X and de-prioritize feature Y. Next steps include a design review.",
            "A comprehensive research report on renewable energy. It presents findings on solar panel efficiency and wind turbine performance. Government policies are supporting this sector.",
            "Internal communication regarding the new HR policy. It details changes to vacation days and sick leave. Employees should review the updated handbook.",
            "Financial statements for Q3 show strong profits. Despite challenges, our investments are yielding high returns.",
            "A legal brief on intellectual property rights. It covers patent applications and trademark protection.",
            "Summary of competitor analysis: Competitor A launched a new product; Competitor B is expanding market share. We need to adapt quickly."
        ],
        "highlights": [
            "Financial growth is a key topic. Economy is expanding. Revenues increased by 15%.",
            "Legal contracts focus on data privacy and regulatory compliance, important for GDPR.",
            "Meeting minutes cover product development updates, prioritizing feature X.",
            "Research report on solar panel and wind turbine performance in renewable energy.",
            "New HR policy details changes to vacation and sick leave.",
            "Q3 financial statements show strong profits from investments.",
            "Legal brief covers patent applications and trademark protection.",
            "Competitor A launched a new product; Competitor B is expanding market share."
        ]
    }
    dataset = Dataset.from_dict(dummy_data)
    eval_dataset = Dataset.from_dict(dummy_data) # Use dummy for eval too

# Load Tokenizer
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

# Preprocessing Function
# For extractive summarization, 'labels' would typically be sentence-level classifications
# (e.g., 1 for summary sentence, 0 otherwise).
# Here, for demonstration with this dataset structure, we create dummy binary labels.
# This simulates a simplified classification task based on keywords in highlights.
def preprocess_function(examples):
    # Tokenize the article text.
    model_inputs = tokenizer(examples["article"], max_length=512, truncation=True, padding="max_length")

    # Create a dummy binary label for demonstration purposes.
    # If "financial" is in the highlights, assign label 1; otherwise, 0.
    # In a real extractive summarization system, 'labels' would precisely mark summary sentences.
    model_inputs["labels"] = [1 if "financial" in h.lower() else 0 for h in examples["highlights"]]

    return model_inputs

# Apply preprocessing to datasets
tokenized_dataset = dataset.map(preprocess_function, batched=True)
tokenized_eval_dataset = eval_dataset.map(preprocess_function, batched=True)

# Remove original text columns to save memory and ensure correct format for model input
tokenized_dataset = tokenized_dataset.remove_columns(["article", "highlights"])
tokenized_eval_dataset = tokenized_eval_dataset.remove_columns(["article", "highlights"])

# Set format for PyTorch tensors
tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
tokenized_eval_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

print("--- Dataset Prepared ---")
print(f"Training dataset size: {len(tokenized_dataset)}")
print(f"Validation dataset size: {len(tokenized_eval_dataset)}")
if len(tokenized_dataset) > 0:
    # Use .with_format(None) temporarily to print a plain Python dict for easier viewing
    print(f"Example tokenized entry: {tokenized_dataset.with_format(None)[0]}")
else:
    print("No entries in the tokenized training dataset.")


# --- 2. Fine-tuning DistilBERT with PEFT (LoRA) ---
print("\n--- Starting Model Loading and PEFT Setup ---")

# Load DistilBERT Model for Sequence Classification
# num_labels=2 for our dummy binary classification (relevant/not relevant)
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)

# Define LoRA Configuration
# TaskType.SEQ_CLS is appropriate for sequence classification.
# target_modules specify which layers LoRA should modify.
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=64,                      # Rank of the update matrices
    lora_alpha=16,             # Scaling factor for LoRA
    lora_dropout=0.1,          # Dropout probability for LoRA layers
    bias="none",               # No bias for LoRA layers
    target_modules=["q_lin", "k_lin", "v_lin"], # Standard target modules for DistilBERT attention
)

# Apply LoRA to the base model
model = get_peft_model(model, peft_config)
print("\n--- PEFT Model Trainable Parameters ---")
model.print_trainable_parameters() # Displays the small number of trainable parameters

# Training Arguments
training_args = TrainingArguments(
    output_dir="./distilbert-summarization-peft", # Directory to save model checkpoints
    num_train_epochs=3,                          # Number of training epochs
    per_device_train_batch_size=8,               # Batch size per device for training
    per_device_eval_batch_size=8,                # Batch size per device for evaluation
    warmup_steps=500,                            # Number of warmup steps for learning rate scheduler
    weight_decay=0.01,                           # Strength of weight decay regularization
    logging_dir="./logs",                        # Directory for storing logs
    logging_steps=100,                           # Log every 100 update steps
    #evaluation_strategy="epoch",                 # Evaluate at the end of each epoch
    #save_strategy="epoch",                       # Save model checkpoint at the end of each epoch
    #load_best_model_at_end=True,                 # Load the best model based on eval_loss after training
    metric_for_best_model="eval_loss",           # Metric to monitor for selecting the best model
    label_names=["labels"],                      # Explicitly tell Trainer the name of the label column
    push_to_hub=False,                           # Do not push the model to Hugging Face Hub (for internal enterprise use)
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    eval_dataset=tokenized_eval_dataset,
    processing_class=tokenizer, # Pass tokenizer for handling dynamic padding/truncation during evaluation (resolves FutureWarning)
)

# Train the Model
print("\n--- Starting Model Training ---")
trainer.train()
print("\n--- Model Training Complete ---")

# Save the fine-tuned LoRA adapter and tokenizer
# This saves only the small LoRA weights, which can be loaded onto a base DistilBERT model.
output_dir = "./distilbert-summarization-peft/lora_adapter"
os.makedirs(output_dir, exist_ok=True)
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"\n--- LoRA adapter and tokenizer saved to {output_dir} ---")

In [None]:
# Save the above cell code above as train_summarizer.py
# python train_summarizer.py
# Save the following code as app.py in the same directory as train_summarizer.py:

# To run the FastAPI application:

# Make sure the train_summarizer.py script has successfully run and created the lora_adapter directory.
# Save the app.py code in the same directory.
# Open a new terminal or command prompt, navigate to that directory.
# Execute the FastAPI application:
# uvicorn app:app --reload --host 0.0.0.0 --port 8000



In [None]:
# 3. Testing the API
# Once the FastAPI server is running (you'll see a message like "Uvicorn running on [suspicious link removed]"), you can test it.

# Using FastAPI's Interactive Docs:

# Open your web browser and go to http://127.0.0.1:8000/docs.
# You'll see the Swagger UI. Click on the /summarize/ endpoint, then "Try it out", and enter some text in the text field. Click "Execute".
# Using curl (from a new terminal):
# You can send POST requests to the API.

# Example for "financial" related text:

# Bash

curl -X POST "http://127.0.0.1:8000/summarize/" \
     -H "Content-Type: application/json" \
     -d '{"text": "Our company had excellent financial performance this quarter. Revenue grew substantially, and we achieved significant cost reductions through efficiency improvements."}'
Example for non-"financial" related text:

Bash

curl -X POST "http://127.0.0.1:8000/summarize/" \
     -H "Content-Type: application/json" \
     -d '{"text": "The new marketing campaign will launch next month. It targets a younger demographic and will focus on social media channels to increase brand awareness."}'