<a href="https://colab.research.google.com/github/SarsijNayan/PLAGIARIASM_REMOVER/blob/main/Untitled8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Complete Colab cell — fixed (works in Google Colab)
!pip install -q nltk ipywidgets

import nltk
nltk_data_to_download = [
    'punkt',
    'punkt_tab',
    'averaged_perceptron_tagger',
    'averaged_perceptron_tagger_eng',
    'wordnet',
    'stopwords'
]
for res in nltk_data_to_download:
    try:
        nltk.download(res, quiet=True)
    except Exception as e:
        print(f"Warning downloading {res}: {e}")

import random, re
from nltk.corpus import wordnet, stopwords
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk.tokenize.treebank import TreebankWordDetokenizer
import ipywidgets as widgets
from IPython.display import display

_detok = TreebankWordDetokenizer()
_stop_words = set(stopwords.words("english"))

def _is_simple_word(token):
    return bool(re.fullmatch(r"[A-Za-z]+", token))

def _get_synonyms_same_pos(word):
    syns = []
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            name = lemma.name().replace('_', ' ')
            if re.fullmatch(r"[A-Za-z]+", name):
                syns.append(name)
    if not syns:
        return []
    orig_pos = pos_tag([word])[0][1]
    final = []
    for s in set(syns):
        tagged = pos_tag([s])[0][1]
        if tagged == orig_pos:
            final.append(s)
    return final

def plagiarism_remover_token(token):
    if token.lower() in _stop_words:
        return token
    if not _is_simple_word(token):
        return token
    candidates = _get_synonyms_same_pos(token.lower())
    if not candidates:
        return token
    chosen = random.choice(candidates)
    if token.istitle():
        return chosen.title()
    elif token.isupper():
        return chosen.upper()
    else:
        return chosen

def plagiarism_removal(text):
    if not text or not text.strip():
        return text
    tokens = word_tokenize(text)
    replaced = [plagiarism_remover_token(t) for t in tokens]
    return _detok.detokenize(replaced)

# UI widgets
text_input = widgets.Textarea(
    value="",
    placeholder="Type or paste text here...",
    description="Input:",
    layout=widgets.Layout(width='880px', height='200px')
)

output = widgets.Textarea(
    value="",
    placeholder="Plagiarism-removed text will appear here...",
    description="Output:",
    layout=widgets.Layout(width='880px', height='200px')
)

button = widgets.Button(
    description="Remove Plagiarism",
    button_style='primary'
)

seed_box = widgets.Text(
    value="",
    placeholder="(optional integer)",
    description='Random seed:',
    layout=widgets.Layout(width='260px')
)

status = widgets.HTML(value="")

help_html_widget = widgets.HTML(
    "<small>Notes: (1) Press the button multiple times to get different synonym choices. "
    "(2) Leave Random seed blank for true randomness or enter an integer to reproduce an output. "
    "(3) This replaces single-word synonyms only; punctuation, numbers and multi-word synonyms are kept.</small>"
)

def on_click(b):
    # handle seed (allow blank)
    seed_val = seed_box.value.strip()
    if seed_val != "":
        try:
            random.seed(int(seed_val))
            status.value = "<span style='color:green'>Using seed: {}</span>".format(seed_val)
        except Exception:
            random.seed(None)
            status.value = "<span style='color:orange'>Invalid seed — using random.</span>"
    else:
        random.seed(None)
        status.value = "<span style='color:green'>Processing (random seed)…</span>"
    try:
        inp = text_input.value
        out = plagiarism_removal(inp)
        output.value = out
        status.value = "<span style='color:green'>Done — press again for another variation.</span>"
    except Exception as e:
        status.value = f"<span style='color:red'>Error: {e}</span>"

button.on_click(on_click)

layout = widgets.VBox([
    text_input,
    widgets.HBox([button, seed_box, status]),
    output,
    help_html_widget
])

display(layout)


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.6/1.6 MB[0m [31m37.5 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.6/1.6 MB[0m [31m35.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25h

VBox(children=(Textarea(value='', description='Input:', layout=Layout(height='200px', width='880px'), placehol…