In [1]:
!pip install matplotlib

import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from collections import Counter
from heapq import nlargest

import pandas as pd
import tkinter as tk
from tkinter import scrolledtext, ttk, messagebox, filedialog, colorchooser
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg


# Load the spaCy model
nlp = spacy.load('en_core_web_sm')

def summarize_text():
    # Get text from the input field
    text = text_input.get("1.0", tk.END)
    
    if len(text.strip()) < 100:
        messagebox.showwarning("Warning", "Please enter a longer text for better summary results.")
        return
    
    # Process text with spaCy
    doc = nlp(text)
    
    # Create tokens without stopwords and punctuation
    tokens = [token.text.lower() for token in doc 
            if not token.is_stop and 
            not token.is_punct and 
            token.text != '\n']
    
    # Calculate word frequency
    word_freq = Counter(tokens)
    max_freq = max(word_freq.values())
    
    # Normalize frequencies
    for word in word_freq.keys():
        word_freq[word] = word_freq[word]/max_freq
    
    # Get sentences
    sent_token = [sent.text for sent in doc.sents]
    
    # Calculate sentence scores based on word frequencies
    sent_scores = dict()
    
    for sent in sent_token:
        for word in sent.split():
            if word.lower() in word_freq.keys():
                if sent in sent_scores:
                    sent_scores[sent] += word_freq[word.lower()]
                else:
                    sent_scores[sent] = word_freq[word.lower()]
    
    # Get number of sentences for summary
    try:
        num_sentences = int(sentence_count.get())
        if num_sentences <= 0 or num_sentences > len(sent_token):
            num_sentences = min(2, len(sent_token))
    except:
        num_sentences = min(2, len(sent_token))
    
    # Get top sentences
    nlargest_sentences = nlargest(num_sentences, sent_scores, key=sent_scores.get)
    
    # Join sentences for summary
    summary = " ".join(nlargest_sentences)
    
    # Display the summary
    summary_output.delete("1.0", tk.END)
    summary_output.insert(tk.END, summary)
    
    # Show stats???????????????????????????????????????????????????
    if len(sent_token) > 0:
        compression = (num_sentences / len(sent_token)) * 100
        stats_label.config(text=f"Original text: {len(text)} characters, {len(sent_token)} sentences\n"
                          f"Summary: {len(summary)} characters, {num_sentences} sentences\n"
                          f"Compression ratio: {compression:.1f}%")
    
    # Update the visualization tab with new data
    update_visualizations(word_freq)
    
    # Remember the s
    # ummary for export
    global current_summary
    current_summary = summary

def update_visualizations(word_freq):
    # Clear previous plots
    word_freq_frame.pack_forget()
    sent_score_frame.pack_forget()
    
    # Top words visualization
    fig1 = plt.Figure(figsize=(6, 4), dpi=100)
    ax1 = fig1.add_subplot(111)
    
    # Get top 10 words
    top_words = dict(Counter(word_freq).most_common(10))
    
    # Create horizontal bar chart
    ax1.barh(list(top_words.keys()), list(top_words.values()), color='skyblue')
    ax1.set_title('Top Words by Frequency')
    ax1.set_xlabel('Normalized Frequency')
    
    # Create canvas
    word_freq_canvas = FigureCanvasTkAgg(fig1, word_freq_frame)
    word_freq_canvas.draw()
    word_freq_canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)
    word_freq_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
    
    # Switch to visualization tab
    notebook.select(viz_tab)

def load_sample():
    text_input.delete("1.0", tk.END)
    text_input.insert(tk.END, sample_text)

def open_file():
    file_path = filedialog.askopenfilename(
        filetypes=[("Text files", "*.txt"), ("All files", "*.*")]
    )
    if file_path:
        try:
            with open(file_path, "r", encoding="utf-8") as file:
                text = file.read()
                text_input.delete("1.0", tk.END)
                text_input.insert(tk.END, text)
        except Exception as e:
            messagebox.showerror("Error", f"Could not open file: {e}")

def change_theme():
    if style_var.get() == "Light":
        root.configure(bg="#f0f0f0")
        style.configure(".", background="#f0f0f0")
        text_input.configure(bg="white", fg="black")
        summary_output.configure(bg="white", fg="black")
    else:
        root.configure(bg="#2c2c2c")
        style.configure(".", background="#2c2c2c", foreground="white")
        text_input.configure(bg="#3c3c3c", fg="white", insertbackground="white")
        summary_output.configure(bg="#3c3c3c", fg="white", insertbackground="white")

def change_highlight_color():
    color = colorchooser.askcolor(title="Choose highlight color for key sentences")[1]
    if color:
        summary_output.tag_configure("highlight", background=color)
        highlight_summary()

def highlight_summary():
    # Get the current summary
    summary = summary_output.get("1.0", tk.END)
    
    # Apply highlight to all text
    summary_output.delete("1.0", tk.END)
    summary_output.insert(tk.END, summary)
    summary_output.tag_add("highlight", "1.0", tk.END)

def copy_to_clipboard():
    root.clipboard_clear()
    root.clipboard_append(summary_output.get("1.0", tk.END))
    messagebox.showinfo("Copied", "Summary copied to clipboard")

def show_help():
    help_window = tk.Toplevel(root)
    help_window.title("Help - Text Summarizer")
    help_window.geometry("600x400")
    
    help_text = """Text Summarizer Help
    
How to use:
1. Enter or paste text in the input area or load from a file
2. Select the number of sentences to include in the summary
3. Click "Generate Summary" to create a summary
4. View the summary in the output area
5. Save the summary or copy to clipboard as needed
6. Check the Visualization tab for word frequency and sentence score charts

Features:
- Load text from files
- Save summaries as text files
- Copy summaries to clipboard
- Light and dark themes
- Highlight key sentences
- Data visualizations
- Adjustable summary length

Tips:
- Longer texts provide better results
- Try different sentence counts for different summary lengths
- Use visualizations to understand which words and sentences are most important
"""
    
    help_scroll = scrolledtext.ScrolledText(help_window, wrap=tk.WORD)
    help_scroll.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
    help_scroll.insert(tk.END, help_text)
    help_scroll.config(state=tk.DISABLED)

# Sample text
sample_text = """In an attempt to build an AI-ready workforce, Microsoft announced Intelligent Cloud Hub which has been lanched to empower the next generation of students with AI-ready skills. Envisioned as a three-year collaborative program, Intelligent Cloud Hub will support around 100 institutions with AI infrastructure, course content and curriculum, developer support, development tools and give students access to cloud and AI services. As part of the program, the Redmond giant which wants to expand its reach and is planning to build a strong developer ecosystem in India with the program will set up the core AI infrastructure and IoT Hub for the selected campuses. The company will provide AI development tools and Azure AI services such as Microsoft Cognitive Services, Bot Services and Azure Machine Learning.According to Manish Prakash, Country General Manager-PS, Health and Education, Microsoft India, said, With AI being the defining technology of our time, it is transforming lives and industry and the jobs of tomorrow will require a different skillset. This will require more collaborations and training and working with AI. That's why it has become more critical than ever for educational institutions to integrate new cloud and AI technologies. The program is an attempt to ramp up the institutional set-up and build capabilities among the educators to educate the workforce of tomorrow. The program aims to build up the cognitive skills and in-depth understanding of developing intelligent cloud connected solutions for applications across industry. Earlier in April this year, the company announced Microsoft Professional Program In AI as a learning track open to the public. The program was developed to provide job ready skills to programmers who wanted to hone their skills in AI and data science with a series of online courses which featured hands-on labs and expert instructors as well. This program also included developer-focused AI school that provided a bunch of assets to help build AI skills."""

# Create the main window
root = tk.Tk()
root.title("Advanced Text Summarizer")
root.geometry("900x700")

# Create a style for ttk widgets
style = ttk.Style()
style.theme_use('clam')  # 'clam', 'alt', 'default', 'classic'

# Store current summary
current_summary = ""

# Create a menu
menu_bar = tk.Menu(root)
root.config(menu=menu_bar)

# File menu
file_menu = tk.Menu(menu_bar, tearoff=0)
menu_bar.add_cascade(label="File", menu=file_menu)
file_menu.add_command(label="Open Text File", command=open_file)
file_menu.add_separator()
file_menu.add_command(label="Exit", command=root.quit)

# Edit menu
edit_menu = tk.Menu(menu_bar, tearoff=0)
menu_bar.add_cascade(label="Edit", menu=edit_menu)
edit_menu.add_command(label="Copy Summary to Clipboard", command=copy_to_clipboard)
edit_menu.add_command(label="Highlight Key Sentences", command=highlight_summary)
edit_menu.add_command(label="Choose Highlight Color", command=change_highlight_color)

# View menu
view_menu = tk.Menu(menu_bar, tearoff=0)
menu_bar.add_cascade(label="View", menu=view_menu)
style_var = tk.StringVar(value="Light")
view_menu.add_radiobutton(label="Light Theme", variable=style_var, value="Light", command=change_theme)
view_menu.add_radiobutton(label="Dark Theme", variable=style_var, value="Dark", command=change_theme)

# Help menu
help_menu = tk.Menu(menu_bar, tearoff=0)
menu_bar.add_cascade(label="Help", menu=help_menu)
help_menu.add_command(label="Help Contents", command=show_help)
help_menu.add_command(label="About", command=lambda: messagebox.showinfo("About", "Text Summarizer v1.0\nCreated using NLP and spaCy"))

# Create a notebook with tabs
notebook = ttk.Notebook(root)
notebook.pack(fill='both', expand=True, padx=10, pady=10)

# Create the main tab
main_tab = ttk.Frame(notebook)
notebook.add(main_tab, text="Summarizer")

# Create toolbar
toolbar = ttk.Frame(main_tab)
toolbar.pack(fill=tk.X, padx=5, pady=5)

open_button = ttk.Button(toolbar, text="Open File", command=open_file)
open_button.pack(side=tk.LEFT, padx=2)



copy_button = ttk.Button(toolbar, text="Copy to Clipboard", command=copy_to_clipboard)
copy_button.pack(side=tk.LEFT, padx=2)

# Input area
input_frame = ttk.LabelFrame(main_tab, text="Input Text")
input_frame.pack(fill='both', expand=True, padx=5, pady=5)

text_input = scrolledtext.ScrolledText(input_frame, wrap=tk.WORD, height=12, font=("Arial", 10))
text_input.pack(fill='both', expand=True, padx=5, pady=5)

# Controls area
control_frame = ttk.Frame(main_tab)
control_frame.pack(fill='x', padx=5, pady=5)

ttk.Label(control_frame, text="Number of sentences:").pack(side=tk.LEFT, padx=5)
sentence_count = ttk.Spinbox(control_frame, from_=1, to=20, width=5)
sentence_count.set(2)
sentence_count.pack(side=tk.LEFT, padx=5)

summarize_button = ttk.Button(control_frame, text="Generate Summary", command=summarize_text)
summarize_button.pack(side=tk.RIGHT, padx=5)

sample_button = ttk.Button(control_frame, text="Load Sample Text", command=load_sample)
sample_button.pack(side=tk.RIGHT, padx=5)

# Output area
output_frame = ttk.LabelFrame(main_tab, text="Summary")
output_frame.pack(fill='both', expand=True, padx=5, pady=5)

summary_output = scrolledtext.ScrolledText(output_frame, wrap=tk.WORD, height=10, font=("Arial", 10))
summary_output.pack(fill='both', expand=True, padx=5, pady=5)
summary_output.tag_configure("highlight", background="yellow")

# Stats area
stats_frame = ttk.LabelFrame(main_tab, text="Statistics")
stats_frame.pack(fill='x', padx=5, pady=5)

stats_label = ttk.Label(stats_frame, text="Enter text and generate summary to see statistics")
stats_label.pack(padx=5, pady=5)

# Create the visualization tab
viz_tab = ttk.Frame(notebook)
notebook.add(viz_tab, text="Visualizations")

# Word frequency visualization frame
word_freq_frame = ttk.LabelFrame(viz_tab, text="Top Words by Frequency")

# Sentence score visualization frame
sent_score_frame = ttk.LabelFrame(viz_tab, text="Top Sentences by Score")

# Create settings tab
settings_tab = ttk.Frame(notebook)
notebook.add(settings_tab, text="Settings")

# UI Settings
ui_frame = ttk.LabelFrame(settings_tab, text="User Interface")
ui_frame.pack(fill='x', padx=10, pady=10)

# Theme options
theme_frame = ttk.Frame(ui_frame)
theme_frame.pack(fill='x', padx=5, pady=5)
ttk.Label(theme_frame, text="Theme:").pack(side=tk.LEFT, padx=5)
theme_combo = ttk.Combobox(theme_frame, values=["Light", "Dark"], textvariable=style_var, state="readonly")
theme_combo.current(0)
theme_combo.pack(side=tk.LEFT, padx=5)
ttk.Button(theme_frame, text="Apply", command=change_theme).pack(side=tk.LEFT, padx=5)

# Font options
font_frame = ttk.Frame(ui_frame)
font_frame.pack(fill='x', padx=5, pady=5)
ttk.Label(font_frame, text="Font Size:").pack(side=tk.LEFT, padx=5)
font_size = ttk.Spinbox(font_frame, from_=8, to=20, width=5)
font_size.set(10)
font_size.pack(side=tk.LEFT, padx=5)
ttk.Button(font_frame, text="Apply", command=lambda: [
    text_input.config(font=("Arial", int(font_size.get()))),
    summary_output.config(font=("Arial", int(font_size.get())))
]).pack(side=tk.LEFT, padx=5)

# Highlight color
highlight_frame = ttk.Frame(ui_frame)
highlight_frame.pack(fill='x', padx=5, pady=5)
ttk.Button(highlight_frame, text="Change Highlight Color", command=change_highlight_color).pack(side=tk.LEFT, padx=5)

# Start the main loop
root.mainloop()



  from .autonotebook import tqdm as notebook_tqdm
