In [None]:
!pip install nltk
import nltk
nltk.download('punkt_tab')



[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
!pip install rouge-score


Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=85e8bb287276080fd283047f47e4d2d56e4befd0830caa532a86a28093bbec1c
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [None]:
!pip install PyPDF2

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/232.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


In [None]:
# Required Libraries
import nltk
import numpy as np
import networkx as nx
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag
from rouge_score import rouge_scorer
from PyPDF2 import PdfReader
import urllib.request
from bs4 import BeautifulSoup
import warnings
warnings.filterwarnings("ignore")


# Download necessary NLTK packages
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

# Text Preprocessing Function
def preprocess_text(text):
    text = re.sub(r'\s+', ' ', text)  # remove extra spaces
    text = re.sub(r'[^\w\s]', '', text)  # remove punctuation
    text = text.lower()  # lowercase
    stop_words = set(stopwords.words("english"))
    lemmatizer = WordNetLemmatizer()
    words = word_tokenize(text)
    lemmatized = [lemmatizer.lemmatize(w) for w in words if w not in stop_words]
    return ' '.join(lemmatized)

# Input Methods
def get_text_input():
    print("Select one way of inputting your text: ")
    print("1. Type your Text(or Copy-Paste)")
    print("2. Load from .txt file")
    print("3. Load from .pdf file")
    print("4. From Wikipedia Page URL")
    choice = input("\n")

    if choice == "1":
        return input("Enter your text :\n")
    elif choice == "2":
        filepath = input("Enter full path of your .txt file : ")
        with open(filepath, "r", encoding="utf-8") as file:
            return file.read()
    elif choice == "3":
        filepath = input("Enter full path of your .pdf file : ")
        reader = PdfReader(filepath)
        return "\n".join([page.extract_text() for page in reader.pages])
    elif choice == "4":
        url = input("Enter full Wikipedia page URL : ")
        html = urllib.request.urlopen(url).read()
        soup = BeautifulSoup(html, "html.parser")
        return ' '.join([p.text for p in soup.find_all('p')])
    else:
        print("Invalid choice!")
        return ""

# Sentence Similarity Function using TF-IDF
def build_similarity_matrix(sentences, tfidf_matrix):
    sim_matrix = np.zeros((len(sentences), len(sentences)))
    for i in range(len(sentences)):
        for j in range(len(sentences)):
            if i != j:
                sim_matrix[i][j] = np.dot(tfidf_matrix[i], tfidf_matrix[j]) / (
                    np.linalg.norm(tfidf_matrix[i]) * np.linalg.norm(tfidf_matrix[j]))
    return sim_matrix

# Generate Summary Function
def generate_summary(text, num_sentences=3):
    original_sentences = sent_tokenize(text)
    cleaned_sentences = [preprocess_text(s) for s in original_sentences]

    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(cleaned_sentences).toarray()
    similarity_matrix = build_similarity_matrix(cleaned_sentences, tfidf_matrix)

    nx_graph = nx.from_numpy_array(similarity_matrix)
    scores = nx.pagerank(nx_graph)

    ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(original_sentences)), reverse=True)
    summary_sentences = [s for _, s in ranked_sentences[:num_sentences]]
    return ' '.join(summary_sentences)

# ROUGE Evaluation Function
def evaluate_summary(original, summary):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(original, summary)

    print("\n******************** ROUGE Evaluation ********************")
    for metric, result in scores.items():
        print(f"{metric.upper()}:")
        print(f"  Precision: {result.precision:.4f}")
        print(f"  Recall:    {result.recall:.4f}")
        print(f"  F1-Score:  {result.fmeasure:.4f}")
    print("**********************************************************")

# Main Execution
def main():
    text = get_text_input()
    if not text:
        return
    summary = generate_summary(text, num_sentences=3)

    print("\n\n******************** Summary ********************\n")
    print(summary)
    print("\n")
    print("Total words in original article = ", len(text.split()))
    print("Total words in summarized article = ", len(summary.split()))

    evaluate_summary(text, summary)

# Run it
main()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


Select one way of inputting your text: 
1. Type your Text(or Copy-Paste)
2. Load from .txt file
3. Load from .pdf file
4. From Wikipedia Page URL

1
Enter your text :
Agricultural production is not only fundamental to improving nutrition, but is also the main source of income for many.  Increases in crop production are key to ending hunger, as well as economic and social development.  Global crop production has changed dramatically in recent decades. The amount of food we grow has increased rapidly as a result of two drivers:  the amount of land we use for agriculture has expanded, but the largest driver has been a rapid rise in crop yields.  The diversity of diets has also increased in many countries around the world. Cereals, roots, and other staple crops once made up the majority of agricultural produce.  This has expanded into legumes, fruits, vegetables, nuts, seeds, and other foods.


******************** Summary ********************

Agricultural production is not only fundament