# Import Required Libraries

In [None]:
import nltk
import networkx as nx
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from itertools import combinations
from collections import defaultdict
import string

from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet

nltk.download("punkt")
nltk.download("averaged_perceptron_tagger_eng")
nltk.download("stopwords")
nltk.download("wordnet")
nltk.download("omw-1.4")

lemmatizer = WordNetLemmatizer()

stop_words = set(stopwords.words('english'))
punctuations = set(string.punctuation)

# TextRank-Based Keyword Extraction Functions

In [None]:
# Convert POS Tags to WordNet Format
def get_wordnet_pos(tag):
    if tag.startswith("J"):
        return wordnet.ADJ
    elif tag.startswith("V"):
        return wordnet.VERB
    elif tag.startswith("N"):
        return wordnet.NOUN
    elif tag.startswith("R"):
        return wordnet.ADV
    else:
        return wordnet.NOUN

In [None]:
# Extracts candidate keywords from the input text
def extract_candidates(text):
    words = []
    for sent in sent_tokenize(text):
        tokens = word_tokenize(sent)
        tagged = nltk.pos_tag(tokens)
        for word, tag in tagged:
            word_lower = word.lower()
            if word_lower not in stop_words and word_lower not in punctuations:
                if tag.startswith("NN") or tag.startswith("JJ"):
                    lemma = lemmatizer.lemmatize(word_lower, get_wordnet_pos(tag))
                    words.append(lemma)
    return words

In [None]:
# Builds a co-occurrence graph from a list of words
def build_graph(words, window_size=4):
    graph = nx.Graph()
    for i in range(len(words)):
        for j in range(i+1, i+window_size):
            if j < len(words) and words[i] != words[j]:
                graph.add_edge(words[i], words[j])
    return graph

In [None]:
# Extracts top keywords from the input text using the TextRank algorithm
def textrank_keywords(text, top_n=10):
    words = extract_candidates(text)
    graph = build_graph(words)
    scores = nx.pagerank(graph)
    sorted_keywords = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    return sorted_keywords[:top_n]

In [None]:
text = """
Artificial Intelligence (AI) is rapidly transforming industries across the globe. From healthcare and education to finance and transportation, AI is improving efficiency, accuracy, and decision-making capabilities. In healthcare, AI-powered systems assist doctors in diagnosing diseases such as cancer more quickly and accurately. Machine learning algorithms analyze vast amounts of patient data to identify patterns that humans may miss.

In the financial sector, AI helps detect fraudulent transactions and automates investment strategies. Robo-advisors are increasingly being used by individuals to manage their portfolios. In education, AI-powered learning platforms adapt to individual students' needs, offering personalized content and real-time feedback.

Self-driving cars are one of the most visible applications of AI in transportation. These vehicles use computer vision, sensors, and neural networks to navigate safely through traffic. Moreover, natural language processing enables AI systems to understand and respond to human language, making virtual assistants like Siri and Alexa more effective.

Despite its benefits, AI also raises ethical concerns, such as data privacy, job displacement, and algorithmic bias. As AI becomes more integrated into daily life, it is crucial to establish clear regulations and ethical guidelines to ensure its responsible use.

Overall, AI represents both an opportunity and a challenge for modern society. Its continued development will shape the future in ways we are only beginning to understand.
"""


# 📊 Keyword Visualization

In [None]:
import matplotlib.pyplot as plt

def visualize_keywords(keywords):
    words = [word for word, score in keywords]
    scores = [score for word, score in keywords]

    plt.figure(figsize=(10, 6))
    bars = plt.bar(words, scores, color='skyblue')
    plt.xlabel('Keywords')
    plt.ylabel('PageRank Score')
    plt.title('Top Keywords Extracted via TextRank')
    plt.xticks(rotation=45)
    plt.tight_layout()

    for bar, score in zip(bars, scores):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height(), f"{score:.3f}",
                 ha='center', va='bottom', fontsize=9)

    plt.show()


In [None]:
keywords = textrank_keywords(text)
visualize_keywords(keywords)