In [None]:
import nltk
import numpy as np
import pandas as pd
import networkx as nx
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
# Sample text (Replace this with your dataset, e.g., news articles)
text = """Artificial Intelligence (AI) is transforming industries by automating processes, enhancing efficiency, and enabling data-driven decision-making. Businesses leverage AI to improve customer experiences, optimize supply chains, and detect fraud. However, AI implementation comes with challenges, including ethical considerations, bias in algorithms, and data privacy concerns. Companies must address these challenges to ensure responsible AI usage. The future of AI lies in advancements in machine learning, deep learning, and reinforcement learning, paving the way for smarter automation and human-AI collaboration."""


In [None]:
# Step 1: Split text into sentences
sentences = sent_tokenize(text)

for i, sentence in enumerate(sentences, start=1):
    print(f"Sentence {i}: {sentence}")


Sentence 1: Artificial Intelligence (AI) is transforming industries by automating processes, enhancing efficiency, and enabling data-driven decision-making.
Sentence 2: Businesses leverage AI to improve customer experiences, optimize supply chains, and detect fraud.
Sentence 3: However, AI implementation comes with challenges, including ethical considerations, bias in algorithms, and data privacy concerns.
Sentence 4: Companies must address these challenges to ensure responsible AI usage.
Sentence 5: The future of AI lies in advancements in machine learning, deep learning, and reinforcement learning, paving the way for smarter automation and human-AI collaboration.


In [None]:
# Step 2: Compute TF-IDF scores
vectorizer = TfidfVectorizer(stop_words=stopwords.words('english'))
X = vectorizer.fit_transform(sentences)

# Get feature names (words)
feature_names = vectorizer.get_feature_names_out()

# Convert sparse matrix to dense matrix
dense = X.todense()

# Create a DataFrame for better visualization
df = pd.DataFrame(dense, columns=feature_names, index=[f"Sentence {i}" for i in range(1, len(sentences) + 1)])

# Print the DataFrame
print(df)

             address  advancements        ai  algorithms  artificial  \
Sentence 1  0.000000      0.000000  0.132783    0.000000    0.278661   
Sentence 2  0.000000      0.000000  0.149002    0.000000    0.000000   
Sentence 3  0.000000      0.000000  0.140338    0.294514    0.000000   
Sentence 4  0.381303      0.000000  0.181693    0.000000    0.000000   
Sentence 5  0.000000      0.213647  0.203608    0.000000    0.000000   

            automating  automation      bias  businesses    chains  ...  \
Sentence 1    0.278661    0.000000  0.000000    0.000000  0.000000  ...   
Sentence 2    0.000000    0.000000  0.000000    0.312698  0.312698  ...   
Sentence 3    0.000000    0.000000  0.294514    0.000000  0.000000  ...   
Sentence 4    0.000000    0.000000  0.000000    0.000000  0.000000  ...   
Sentence 5    0.000000    0.213647  0.000000    0.000000  0.000000  ...   

              paving   privacy  processes  reinforcement  responsible  \
Sentence 1  0.000000  0.000000   0.278661  

In [None]:
# Step 3: Build similarity matrix
similarity_matrix = np.dot(X, X.T).toarray()

# Create a DataFrame for better visualization
df_similarity = pd.DataFrame(similarity_matrix,
                             index=[f"Sentence {i}" for i in range(1, len(sentences) + 1)],
                             columns=[f"Sentence {i}" for i in range(1, len(sentences) + 1)])

# Print the DataFrame
print(df_similarity)

            Sentence 1  Sentence 2  Sentence 3  Sentence 4  Sentence 5
Sentence 1    1.000000    0.019785    0.072055    0.024126    0.027036
Sentence 2    0.019785    1.000000    0.020911    0.027073    0.030338
Sentence 3    0.072055    0.020911    1.000000    0.098596    0.028574
Sentence 4    0.024126    0.027073    0.098596    1.000000    0.036994
Sentence 5    0.027036    0.030338    0.028574    0.036994    1.000000


In [None]:
# Step 4: Apply TextRank algorithm
graph = nx.from_numpy_array(similarity_matrix)
scores = nx.pagerank(graph)

# Print scores with sentence numbers
for i, score in enumerate(scores.values(), start=1):
    print(f"Sentence {i}: Score = {score:.4f}")  # Format score to 4 decimal places

Sentence 1: Score = 0.1984
Sentence 2: Score = 0.1964
Sentence 3: Score = 0.2055
Sentence 4: Score = 0.2020
Sentence 5: Score = 0.1977


In [None]:
# Step 5: Rank sentences and extract the top N sentences as summary
ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
num_sentences = max(2, len(sentences) // 3)  # Extract top third of sentences
summary = " ".join([ranked_sentences[i][1] for i in range(num_sentences)])

print("\nOriginal Text:\n", text)
print("\nExtracted Summary:\n", summary)



Original Text:
 Artificial Intelligence (AI) is transforming industries by automating processes, enhancing efficiency, and enabling data-driven decision-making. Businesses leverage AI to improve customer experiences, optimize supply chains, and detect fraud. However, AI implementation comes with challenges, including ethical considerations, bias in algorithms, and data privacy concerns. Companies must address these challenges to ensure responsible AI usage. The future of AI lies in advancements in machine learning, deep learning, and reinforcement learning, paving the way for smarter automation and human-AI collaboration.

Extracted Summary:
 However, AI implementation comes with challenges, including ethical considerations, bias in algorithms, and data privacy concerns. Companies must address these challenges to ensure responsible AI usage.
