In [7]:
import tkinter as tk
from tkinter import scrolledtext
from transformers import BartTokenizer, BartForConditionalGeneration
from langdetect import detect, DetectorFactory
from langdetect.lang_detect_exception import LangDetectException

# Seed for reproducibility
DetectorFactory.seed = 0

# Load the fine-tuned model and tokenizer
model_name = "fine-tuned-bart-mlsum-fr-sampled"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

# Function to summarize text
def summarize_text(text, max_length=150, min_length=30):
    # Tokenize the input text
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
    
    # Generate the summary
    summary_ids = model.generate(inputs, max_length=max_length, min_length=min_length, length_penalty=2.0, num_beams=4, early_stopping=True)
    
    # Decode the summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    return summary

# Function to handle the summarize button click
def summarize():
    input_text = input_text_area.get("1.0", tk.END).strip()
    if not input_text:
        output_text_area.delete("1.0", tk.END)
        output_text_area.insert(tk.END, "Please enter text to summarize.")
        return

    try:
        detected_language = detect(input_text)
    except LangDetectException:
        output_text_area.delete("1.0", tk.END)
        output_text_area.insert(tk.END, "Language detection failed. Please enter a valid text.")
        return
    
    language_label_var.set(f"Detected Language: {detected_language}")

    if detected_language in ["fr", "de", "en"]:
        summary = summarize_text(input_text)
    else:
        summary = f"Unsupported language detected: {detected_language}"

    output_text_area.delete("1.0", tk.END)
    output_text_area.insert(tk.END, summary)

# Create the main window
root = tk.Tk()
root.title("Text Summarizer")

# Create and place the input text area
input_text_label = tk.Label(root, text="Input Text:")
input_text_label.pack()
input_text_area = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=60, height=10)
input_text_area.pack(padx=10, pady=10)

# Create and place the summarize button
summarize_button = tk.Button(root, text="Summarize", command=summarize)
summarize_button.pack(pady=10)

# Create and place the language detection label
language_label_var = tk.StringVar(value="Detected Language: N/A")
language_label = tk.Label(root, textvariable=language_label_var)
language_label.pack()

# Create and place the output text area
output_text_label = tk.Label(root, text="Summary:")
output_text_label.pack()
output_text_area = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=60, height=10)
output_text_area.pack(padx=10, pady=10)

# Start the main loop
root.mainloop()


In [None]:
import tkinter as tk
from tkinter import scrolledtext
from transformers import BartTokenizer, BartForConditionalGeneration, MarianMTModel, MarianTokenizer
from langdetect import detect, DetectorFactory
from langdetect.lang_detect_exception import LangDetectException

# Seed for reproducibility
DetectorFactory.seed = 0

# Load the fine-tuned model and tokenizer
model_name = "fine-tuned-bart-mlsum-fr-sampled"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

# Load translation models and tokenizers
translation_models = {
    "fr": {
        "model": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-fr-en"),
        "tokenizer": MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-fr-en"),
    },
    "de": {
        "model": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-de-en"),
        "tokenizer": MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-de-en"),
    }
}

# Function to summarize text
def summarize_text(text, max_length=150, min_length=30):
    # Tokenize the input text
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
    
    # Generate the summary
    summary_ids = model.generate(inputs, max_length=max_length, min_length=min_length, length_penalty=2.0, num_beams=4, early_stopping=True)
    
    # Decode the summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    return summary

# Function to translate text to English
def translate_to_english(text, source_lang):
    translation_tokenizer = translation_models[source_lang]["tokenizer"]
    translation_model = translation_models[source_lang]["model"]
    
    inputs = translation_tokenizer.encode(text, return_tensors="pt", truncation=True)
    translated_ids = translation_model.generate(inputs, max_length=512, num_beams=4, early_stopping=True)
    translated_text = translation_tokenizer.decode(translated_ids[0], skip_special_tokens=True)
    
    return translated_text

# Function to handle the summarize button click
def summarize():
    input_text = input_text_area.get("1.0", tk.END).strip()
    if not input_text:
        output_text_area.delete("1.0", tk.END)
        output_text_area.insert(tk.END, "Please enter text to summarize.")
        return

    try:
        detected_language = detect(input_text)
    except LangDetectException:
        output_text_area.delete("1.0", tk.END)
        output_text_area.insert(tk.END, "Language detection failed. Please enter a valid text.")
        return
    
    language_label_var.set(f"Detected Language: {detected_language}")

    if detected_language in ["fr", "de", "en"]:
        summary = summarize_text(input_text)
        if detected_language != "en":
            summary = translate_to_english(summary, detected_language)
    else:
        summary = f"Unsupported language detected: {detected_language}"

    output_text_area.delete("1.0", tk.END)
    output_text_area.insert(tk.END, summary)

# Create the main window
root = tk.Tk()
root.title("Text Summarizer")

# Create and place the input text area
input_text_label = tk.Label(root, text="Input Text:")
input_text_label.pack()
input_text_area = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=60, height=10)
input_text_area.pack(padx=10, pady=10)

# Create and place the summarize button
summarize_button = tk.Button(root, text="Summarize", command=summarize)
summarize_button.pack(pady=10)

# Create and place the language detection label
language_label_var = tk.StringVar(value="Detected Language: N/A")
language_label = tk.Label(root, textvariable=language_label_var)
language_label.pack()

# Create and place the output text area
output_text_label = tk.Label(root, text="Summary:")
output_text_label.pack()
output_text_area = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=60, height=10)
output_text_area.pack(padx=10, pady=10)

# Start the main loop
root.mainloop()


In [10]:
import tkinter as tk
from tkinter import scrolledtext, filedialog, messagebox
from PIL import Image
import pytesseract
from transformers import T5ForConditionalGeneration, T5Tokenizer, BartTokenizer, BartForConditionalGeneration, MarianMTModel, MarianTokenizer, MBartForConditionalGeneration, MBart50Tokenizer
from langdetect import detect, DetectorFactory
from langdetect.lang_detect_exception import LangDetectException

# Seed for reproducibility
DetectorFactory.seed = 0

# Load the T5 model and tokenizer for English
model_name_en = 't5-small'  # You can also use 't5-base', 't5-large', etc.
tokenizer_en = T5Tokenizer.from_pretrained(model_name_en)
model_en = T5ForConditionalGeneration.from_pretrained(model_name_en)

# Load the fine-tuned BART model and tokenizer for French
model_name_fr = "fine-tuned-bart-mlsum-fr-sampled"
tokenizer_fr = BartTokenizer.from_pretrained(model_name_fr)
model_fr = BartForConditionalGeneration.from_pretrained(model_name_fr)

# Load the mBART model and tokenizer for German
model_name_de = 'facebook/mbart-large-50-many-to-many-mmt'
tokenizer_de = MBart50Tokenizer.from_pretrained(model_name_de)
model_de = MBartForConditionalGeneration.from_pretrained(model_name_de)

# Load translation models and tokenizers
translation_models = {
    "fr": {
        "model": MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-fr-en"),
        "tokenizer": MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-fr-en"),
    }
}

# Function to summarize text
def summarize_text(text, tokenizer, model, max_length=70, min_length=30, forced_bos_token_id=None, task_prefix="summarize: "):
    # Tokenize the input text
    inputs = tokenizer.encode(task_prefix + text, return_tensors="pt", max_length=1024, truncation=True)
    
    # Generate the summary
    summary_ids = model.generate(inputs, max_length=max_length, min_length=min_length, length_penalty=2.0, num_beams=4, early_stopping=True, forced_bos_token_id=forced_bos_token_id)
    
    # Decode the summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    return summary

# Function to translate text to English
def translate_to_english(text, source_lang):
    translation_tokenizer = translation_models[source_lang]["tokenizer"]
    translation_model = translation_models[source_lang]["model"]
    
    inputs = translation_tokenizer.encode(text, return_tensors="pt", truncation=True)
    translated_ids = translation_model.generate(inputs, max_length=512, num_beams=4, early_stopping=True)
    translated_text = translation_tokenizer.decode(translated_ids[0], skip_special_tokens=True)
    
    return translated_text

# Function to handle the summarize button click
def summarize():
    input_text = input_text_area.get("1.0", tk.END).strip()
    if not input_text:
        output_text_area.delete("1.0", tk.END)
        output_text_area.insert(tk.END, "Please enter text to summarize.")
        return

    try:
        detected_language = detect(input_text)
    except LangDetectException:
        output_text_area.delete("1.0", tk.END)
        output_text_area.insert(tk.END, "Language detection failed. Please enter a valid text.")
        return
    
    language_label_var.set(f"Detected Language: {detected_language}")

    if detected_language == "fr":
        summary = summarize_text(input_text, tokenizer_fr, model_fr)
        summary = translate_to_english(summary, "fr")
    elif detected_language == "de":
        summary = summarize_text(input_text, tokenizer_de, model_de, forced_bos_token_id=tokenizer_de.lang_code_to_id["en_XX"])
    elif detected_language == "en":
        summary = summarize_text(input_text, tokenizer_en, model_en, task_prefix="summarize: ")
    else:
        summary = f"Unsupported language detected: {detected_language}"

    output_text_area.delete("1.0", tk.END)
    output_text_area.insert(tk.END, summary)

# Function to handle the extract text from image button click
def extract_text_from_image():
    file_path = filedialog.askopenfilename(filetypes=[("Image files", "*.png;*.jpg;*.jpeg;*.bmp;*.gif")])
    if file_path:
        try:
            image = Image.open(file_path)
            text = pytesseract.image_to_string(image)
            input_text_area.delete("1.0", tk.END)
            input_text_area.insert(tk.END, text)
        except Exception as e:
            messagebox.showerror("Error", f"Failed to extract text from image. Error: {str(e)}")

# Create the main window
root = tk.Tk()
root.title("Text Summarizer")

# Create and place the input text area
input_text_label = tk.Label(root, text="Input Text:")
input_text_label.pack()
input_text_area = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=60, height=10)
input_text_area.pack(padx=10, pady=10)

# Create and place the summarize button
summarize_button = tk.Button(root, text="Summarize", command=summarize)
summarize_button.pack(pady=10)

# Create and place the extract text from image button
extract_button = tk.Button(root, text="Extract Text from Image", command=extract_text_from_image)
extract_button.pack(pady=10)

# Create and place the language detection label
language_label_var = tk.StringVar(value="Detected Language: N/A")
language_label = tk.Label(root, textvariable=language_label_var)
language_label.pack()

# Create and place the output text area
output_text_label = tk.Label(root, text="Summary:")
output_text_label.pack()
output_text_area = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=60, height=10)
output_text_area.pack(padx=10, pady=10)

# Start the main loop
root.mainloop()


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
