In [11]:
import re
import nltk
import numpy as np
from nltk.tokenize import sent_tokenize, word_tokenize 
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance 
from pypdf import PdfReader

In [92]:
def txt_file(filename): 
    with open(filename, 'r') as file: 
            content = file.read() 
            content.split('\n') # assume sentences are separated by end lines 
    return content 

content = txt_file('test.txt')
content

"Artificial Intelligence (AI) has been making remarkable strides in recent years, transforming various industries and aspects of daily life. From healthcare to finance, education to entertainment, AI's impact is profound and far-reaching. In healthcare, AI algorithms can analyze medical images with incredible accuracy, assisting doctors in diagnosing diseases such as cancer at an early stage. Financial institutions leverage AI for fraud detection, risk assessment, and algorithmic trading, resulting in more secure and efficient operations. In the realm of education, AI-powered tools offer personalized learning experiences, adapting to the needs and pace of individual students. This technology also powers virtual assistants, which can help with tasks ranging from setting reminders to answering complex queries, making life more convenient. Entertainment has also seen a significant transformation with AI, as recommendation algorithms suggest movies, music, and shows tailored to individual 

In [13]:
content = sent_tokenize(content)
sentences = [] 
for sentence in content: 
    words = word_tokenize(sentence) 
    words = [word.lower() for word in words if word.isalnum()] 
    words = " ".join(words) 
    sentences.append(words)
content = sentences
content

['artificial intelligence ai has been making remarkable strides in recent years transforming various industries and aspects of daily life',
 'from healthcare to finance education to entertainment ai impact is profound and',
 'in healthcare ai algorithms can analyze medical images with incredible accuracy assisting doctors in diagnosing diseases such as cancer at an early stage',
 'financial institutions leverage ai for fraud detection risk assessment and algorithmic trading resulting in more secure and efficient operations',
 'in the realm of education tools offer personalized learning experiences adapting to the needs and pace of individual students',
 'this technology also powers virtual assistants which can help with tasks ranging from setting reminders to answering complex queries making life more convenient',
 'entertainment has also seen a significant transformation with ai as recommendation algorithms suggest movies music and shows tailored to individual preferences enhancing us

In [14]:
all_words = set() 
for sentence in content: 
    words = word_tokenize(sentence) 
    all_words.update(words)
all_words = sorted(all_words) 
word_indx = {word:i for i,word in enumerate(all_words)}
print(word_indx)

{'a': 0, 'about': 1, 'accuracy': 2, 'adapting': 3, 'additionally': 4, 'address': 5, 'advancements': 6, 'ai': 7, 'algorithmic': 8, 'algorithms': 9, 'also': 10, 'an': 11, 'analyze': 12, 'and': 13, 'answering': 14, 'are': 15, 'artificial': 16, 'as': 17, 'aspects': 18, 'assessment': 19, 'assistants': 20, 'assisting': 21, 'at': 22, 'automation': 23, 'be': 24, 'been': 25, 'benefits': 26, 'biases': 27, 'breaches': 28, 'by': 29, 'can': 30, 'cancer': 31, 'challenges': 32, 'collaborate': 33, 'come': 34, 'complex': 35, 'concerns': 36, 'consequences': 37, 'considerations': 38, 'continues': 39, 'convenient': 40, 'crucial': 41, 'daily': 42, 'data': 43, 'deployment': 44, 'detection': 45, 'development': 46, 'diagnosing': 47, 'diseases': 48, 'displacement': 49, 'doctors': 50, 'due': 51, 'early': 52, 'education': 53, 'efficient': 54, 'enhancing': 55, 'ensure': 56, 'ensuring': 57, 'entertainment': 58, 'equitable': 59, 'ethical': 60, 'evolve': 61, 'experience': 62, 'experiences': 63, 'fair': 64, 'finance'

In [15]:
def sentence_vector(sentence, word_indx): 
    words = word_tokenize(sentence) 
    vector = np.zeros(len(word_indx)) 
    for word in words: 
        if word in word_indx: 
            vector[word_indx[word]] +=1 
    return vector 
# vectors = np.array([sentence_vector(sentence, word_indx) for sentence in content]) 

In [16]:
def sentence_similarity(vec1, vec2): 
    return 1 - cosine_distance(vec1, vec2) 

In [17]:
def sim_matrix(sentences, word_indx): 
    mat = np.zeros((len(sentences), len(sentences))) 
    vectors = np.array([sentence_vector(sentence, word_indx) for sentence in sentences]) 
    for i in range(len(sentences)): 
        for j in range(len(sentences)): 
            if i != j: 
                mat[i][j] = sentence_similarity(vectors[i], vectors[j])
    return mat

In [18]:
import networkx as nx  
def rank(matrix):
    graph = nx.from_numpy_array(matrix) 
    scores = nx.pagerank(graph) 
    return scores   

In [82]:
matrix = sim_matrix(content, word_indx) 
scores = rank(matrix) 
ranked_sentences = sorted(((scores[i],s) for i,s in enumerate(content)), reverse=True)
summarize_text = [ranked_sentences[i][1] for i in range(3)]

In [65]:
for sentence in summarize_text: 
    print("".join(sentence))

the future of ai holds immense potential and with responsible development and deployment it can lead to a more innovative and equitable world
as ai continues to evolve it is crucial for policymakers researchers and industry leaders to collaborate and address these challenges ensuring that the benefits of ai are maximized while minimizing potential risks
additionally the development of ai must be guided by ethical principles to prevent biases and ensure fair and just outcomes


Paraphrase summarized text

In [94]:
import random 
from nltk.corpus import wordnet 
from nltk.probability import FreqDist

In [95]:
summarize_text

['the future of ai holds immense potential and with responsible development and deployment it can lead to a more innovative and equitable world',
 'as ai continues to evolve it is crucial for policymakers researchers and industry leaders to collaborate and address these challenges ensuring that the benefits of ai are maximized while minimizing potential risks',
 'additionally the development of ai must be guided by ethical principles to prevent biases and ensure fair and just outcomes']

paraphrase (replace with synonyms)

In [96]:
content = ''
for sentence in summarize_text: 
    sentence += '.'
    content += sentence 
content

'the future of ai holds immense potential and with responsible development and deployment it can lead to a more innovative and equitable world.as ai continues to evolve it is crucial for policymakers researchers and industry leaders to collaborate and address these challenges ensuring that the benefits of ai are maximized while minimizing potential risks.additionally the development of ai must be guided by ethical principles to prevent biases and ensure fair and just outcomes.'

In [97]:
words = word_tokenize(content.lower()) 
freq_dist = FreqDist(words) 

In [98]:
def get_synonyms(word): 
    syno = set() 
    for syn in wordnet.synsets(word): 
        for lemma in syn.lemmas(): 
            if lemma.name().lower() != word.lower(): # exclude word itself 
                syno.add(lemma.name())
    return list(syno)

In [99]:
def replace_with_synonyms(words, freq_dist, rr=0.3, srr=0.3): 
    new_words =[] 
    most_common = {word for word, freq in freq_dist.most_common(int(len(freq_dist)*rr))}
    replace_indx = [i for i, word in enumerate(words) if word not in most_common]
    num_to_replace = int(len(replace_indx)*srr) # 30% of replaceable indices to replace with synonyms 
    indx_replaces = random.sample(replace_indx,num_to_replace) 

    for i, word in enumerate(words): 
        if i in indx_replaces: 
            syno = get_synonyms(word) 
            if syno: 
                new_words.append(random.choice(syno))
            else: 
                new_words.append(word) 
        else: 
            new_words.append(word) 
    return new_words

In [101]:
para = replace_with_synonyms(words, freq_dist)
print(summarize_text)
print()
print(' '.join(para)) 

['the future of ai holds immense potential and with responsible development and deployment it can lead to a more innovative and equitable world', 'as ai continues to evolve it is crucial for policymakers researchers and industry leaders to collaborate and address these challenges ensuring that the benefits of ai are maximized while minimizing potential risks', 'additionally the development of ai must be guided by ethical principles to prevent biases and ensure fair and just outcomes']

the future of ai holds immense potential and with responsible development and deployment it can lead to a more_than groundbreaking and equitable world.as ai carry_on to acquire it is crucial for policymakers investigator and industry leader to collaborate and address these challenges ensuring that the benefits of ai are maximized while understate potential risks.additionally the development of ai must be guided by ethical principles to prevent prejudice and ensure fair and simply outcomes .
