<a href="https://colab.research.google.com/github/Satyajeet1718/Multi-Language-Text-Summerizer/blob/main/Multi_Language_text_summarizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from collections import defaultdict
import ipywidgets as widgets
from IPython.display import display

# Download NLTK data
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [8]:
# Marathi and Hindi stopwords lists
marathi_stopwords = set([
    'आणि', 'आहे', 'तसेच', 'परंतु', 'मध्ये', 'करून', 'साठी', 'आहेत',
    'हे', 'त्यामुळे', 'किव्हा', 'असा', 'जरी', 'अशा', 'या', 'हा'
])

hindi_stopwords = set([
    'और', 'है', 'तथा', 'लेकिन', 'में', 'के', 'लिए', 'हैं',
    'यह', 'क्योंकि', 'या', 'ऐसा', 'अगर', 'इस', 'जो', 'था'
])

In [9]:
def get_stopwords(language):
    if language == 'Marathi':
        return marathi_stopwords
    elif language == 'Hindi':
        return hindi_stopwords
    elif language == 'Mix Language':
        return marathi_stopwords.union(hindi_stopwords).union(set(stopwords.words("english")))
    return set(stopwords.words("english"))

In [10]:
def summarize_text(input_text, reduction_factor=1.5, language='English'):
    if not input_text:
        return "Please enter some text to summarize."

    # Tokenizing the text
    stopWords = get_stopwords(language)
    words = word_tokenize(input_text)

    # Create a frequency table to keep the score of each word
    freqTable = defaultdict(int)
    for word in words:
        word = word.lower()
        if word not in stopWords:
            freqTable[word] += 1

    # Tokenizing the sentences
    sentences = sent_tokenize(input_text)
    sentenceValue = defaultdict(int)

    # Calculate the score of each sentence
    for sentence in sentences:
        for word, freq in freqTable.items():
            if word in sentence.lower():
                sentenceValue[sentence] += freq

    # Average value of a sentence from the original text
    sumValues = sum(sentenceValue.values())
    average = int(sumValues / len(sentenceValue)) if sentenceValue else 0

    # Increase the threshold for including sentences in the summary
    threshold = average * reduction_factor

    # Storing sentences into our summary
    summary = ' '.join([sentence for sentence in sentences if sentenceValue[sentence] > threshold])

    return summary

In [11]:
def on_button_click(b):
    input_text = text_area.value
    reduction_factor = 1.0 if b.description == "Less Reduction" else 1.5
    summary = summarize_text(input_text, reduction_factor=reduction_factor, language=dropdown.value)
    output_area.value = summary

In [12]:
# Create widgets for the UI
text_area = widgets.Textarea(
    value='',
    placeholder='Enter the text you want to summarize...',
    description='',
    layout=widgets.Layout(width='100%', height='150px', border='1px solid #ccc')
)

output_area = widgets.Textarea(
    value='',
    placeholder='Summary will appear here...',
    description='',
    layout=widgets.Layout(width='100%', height='150px', border='1px solid #ccc'),
    disabled=True
)

# Buttons for selecting reduction factor
button_less = widgets.Button(
    description="Less Reduction",
    layout=widgets.Layout(width='200px')
)
button_more = widgets.Button(
    description="More Reduction",
    layout=widgets.Layout(width='200px')
)
button_less.on_click(on_button_click)
button_more.on_click(on_button_click)

# Dropdown for language selection, including Mix Language mode
dropdown = widgets.Dropdown(
    options=['English','Marathi', 'Mix'],
    value='English',
    description='Language:',
    layout=widgets.Layout(width='200px')
)

# Label for the input text area
input_label = widgets.Label(value="Enter your text below:")

# Label for the output summary area
output_label = widgets.Label(value="Generated Summary:")

# Horizontal box for buttons
controls = widgets.HBox([button_less, button_more])

# Vertical box for layout with language dropdown at the top
ui = widgets.VBox([dropdown, input_label, text_area, controls, output_label, output_area])

# Display the UI
display(ui)

VBox(children=(Dropdown(description='Language:', layout=Layout(width='200px'), options=('English', 'Marathi', …