In [157]:
import re
import json
import pycld2

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from random import randint
from transformers import *
from deep_translator import GoogleTranslator
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [140]:
sentiment_model = SentimentIntensityAnalyzer()

In [142]:
paraphrase_model = PegasusForConditionalGeneration.from_pretrained("tuner007/pegasus_paraphrase")
paraphrase_tokenizer = PegasusTokenizerFast.from_pretrained("tuner007/pegasus_paraphrase")

loading configuration file https://huggingface.co/tuner007/pegasus_paraphrase/resolve/main/config.json from cache at /Users/katana/.cache/huggingface/transformers/d6a784b31cbe212ac3dabbc78bc4e454cf9d8a1b11ff2ef1ba1c7497f2bbfb33.7a3e093739f407bcc025e64dfc5244f91bc573ed4285cba53de90f960cbce58e
Model config PegasusConfig {
  "activation_dropout": 0.1,
  "activation_function": "relu",
  "add_bias_logits": false,
  "add_final_layer_norm": true,
  "architectures": [
    "PegasusForConditionalGeneration"
  ],
  "attention_dropout": 0.1,
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 16,
  "decoder_start_token_id": 0,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 16,
  "eos_token_id": 1,
  "extra_pos_embeddings": 1,
  "force_bos_token_to_be_generated": false,
  

In [170]:
translation_target = 'en'
translation_original = 'auto'
sumarizer_stopwords = set(stopwords.words("english"))

In [171]:
def translateToTarget(text: str = ''):
    _, textBytesFound, details = pycld2.detect(text)
    translated = GoogleTranslator(source=translation_original, target=translation_target).translate(text.lower())

    return {
        'size': textBytesFound,
        'detected': details[0],
        'translated': translated
    }

def translateToOriginal(translated: str = '', translation_original: str = 'en'):
    original = GoogleTranslator(source=translation_target, target=translation_original, proxies=proxies_example).translate(translated.lower())

    return {
        'translated': translated,
        'translated_language': translation_target,
        'original': original,
        'original_language': translation_original,
    }

def sentiment(content: str = '', original_language: str = 'en'):
    score = sentiment_model.polarity_scores(content)
    
    return {
        'sentences': translateToOriginal(line, original_language),
        'polarity': score
    }

def summarization(content: str = '', original_language: str = 'en'):
    words = word_tokenize(content)
    sentences = sent_tokenize(content)

    freqTable = dict()
    sentenceValue = dict()

    for word in words:
        word = word.lower()
        if word in sumarizer_stopwords:
            continue
        if word in freqTable:
            freqTable[word] = 1
        else:
            freqTable[word] = 1 

    for sentence in sentences:
        for word, freq in freqTable.items():
            if word in sentence.lower():
                if sentence in sentenceValue:
                    sentenceValue[sentence] += freq
                else: sentenceValue[sentence] = freq 

        sumValues = 0

        for sentence in sentenceValue:
            sumValues += sentenceValue[sentence] 

    average = int(sumValues / len(sentenceValue)) 

    summary = ''
    
    for sentence in sentences:
        if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)):
            summary += sentence
        
    
    return translateToOriginal(summary, original_language)

def paraphrasedLine(sentence, num_return_sequences=5, num_beams=5):
    inputs = paraphrase_tokenizer([sentence], truncation=True, padding="longest", return_tensors="pt")
    outputs = paraphrase_model.generate(
        **inputs,
        num_beams=num_beams,
        num_return_sequences=num_return_sequences,
    )

    return paraphrase_tokenizer.batch_decode(outputs, skip_special_tokens=True)

def paraphrased(content: str = '', original_language: str = 'en'):
    content_lines = content.split('. ')
    results = {
        'paraphrased': {
            'translated': '',
            'original': '',
        },
        'sources': [],
        'options': [],
    }
    
    line_index = 0

    for line in content_lines:
        results['options'].append([])
        results['sources'].append(
            translateToOriginal(line, original_language)
        )
        
        paraphrases = paraphrasedLine(line, num_beams=3, num_return_sequences=3)

        for paraphrase in paraphrases:
            results['options'][line_index].append(
                translateToOriginal(paraphrase, original_language)
            )
        
        results['paraphrased']['translated'] += results['options'][line_index][randint(0, 2)]['translated'] + ' ' 
        results['paraphrased']['original'] += results['options'][line_index][randint(0, 2)]['original'] + ' '

        line_index += 1
        
    return results

In [172]:
source_content = 'Perbedaan antara kemerdekaan dan revolusi telah lama diperdebatkan karena dianggap sebagai kekerasan yang dilakukan dengan cara yang sah untuk memperoleh kedaulatan. Secara umum, revolusi hanya bertujuan untuk menyalurkan ulang kekuasaan tetap dengan atau tanpa unsur emansipasi seperti pendemokrasian dalam suatu negara. Namun, beberapa perang untuk mencapai kemerdekaan digambarkan sebagai revolusi seperti yang terjadi pada revolusi Amerika Serikat pada tahun 1783 dan Indonesia pada tahun 1949, sedangkan beberapa revolusi tentang perubahan struktur politik malah mengakibatkan pemisahan diri negara.'

In [173]:
english_content = translateToTarget(source_content)

english_content

{'size': 590,
 'detected': ('INDONESIAN', 'id', 99, 1295.0),
 'translated': 'the difference between independence and revolution has long been debated because it is considered violence carried out in a legitimate way to gain sovereignty. In general, revolution only aims to redistribute permanent power with or without emancipation elements such as democracy in a country. however, some wars for independence are described as revolutions such as those in the United States revolution in 1783 and Indonesia in 1949, while some revolutions regarding changes in political structure resulted in the secession of the state.'}

In [174]:
translated_content = english_content['translated']
original_language = english_content['detected'][1]

In [175]:
souce_content = translateToOriginal(translated_content, original_language)

souce_content

{'translated': 'the difference between independence and revolution has long been debated because it is considered violence carried out in a legitimate way to gain sovereignty. In general, revolution only aims to redistribute permanent power with or without emancipation elements such as democracy in a country. however, some wars for independence are described as revolutions such as those in the United States revolution in 1783 and Indonesia in 1949, while some revolutions regarding changes in political structure resulted in the secession of the state.',
 'translated_language': 'en',
 'original': 'perbedaan antara kemerdekaan dan revolusi telah lama diperdebatkan karena dianggap kekerasan dilakukan dengan cara yang sah untuk mendapatkan kedaulatan. secara umum, revolusi hanya bertujuan untuk mendistribusikan kembali kekuasaan permanen dengan atau tanpa unsur-unsur emansipasi seperti demokrasi di suatu negara. namun, beberapa perang kemerdekaan digambarkan sebagai revolusi seperti yang te

In [176]:
for line in translated_content.split('. '):
    print(json.dumps(sentiment(line, original_language), indent = 2))

{
  "sentences": {
    "translated": "the difference between independence and revolution has long been debated because it is considered violence carried out in a legitimate way to gain sovereignty",
    "translated_language": "en",
    "original": "perbedaan antara kemerdekaan dan revolusi telah lama diperdebatkan karena dianggap kekerasan dilakukan dengan cara yang sah untuk mendapatkan kedaulatan",
    "original_language": "id"
  },
  "polarity": {
    "neg": 0.139,
    "neu": 0.746,
    "pos": 0.115,
    "compound": -0.1779
  }
}
{
  "sentences": {
    "translated": "In general, revolution only aims to redistribute permanent power with or without emancipation elements such as democracy in a country",
    "translated_language": "en",
    "original": "Secara umum, revolusi hanya bertujuan untuk mendistribusikan kembali kekuasaan permanen dengan atau tanpa unsur-unsur emansipasi seperti demokrasi di suatu negara",
    "original_language": "id"
  },
  "polarity": {
    "neg": 0.0,
    "

In [177]:
summarized_text = summarization(translated_content, original_language)

print(json.dumps(summarized_text, indent = 2))

{
  "translated": "however, some wars for independence are described as revolutions such as those in the United States revolution in 1783 and Indonesia in 1949, while some revolutions regarding changes in political structure resulted in the secession of the state.",
  "translated_language": "en",
  "original": "namun, beberapa perang kemerdekaan digambarkan sebagai revolusi seperti yang terjadi pada revolusi amerika serikat pada tahun 1783 dan indonesia pada tahun 1949, sementara beberapa revolusi mengenai perubahan struktur politik mengakibatkan pemisahan negara.",
  "original_language": "id"
}


In [178]:
paraphrased_content = paraphrased(translated_content, original_language)

print(json.dumps(paraphrased_content, indent = 2))

{
  "paraphrased": {
    "translated": "The difference between independence and revolution has been debated for a long time because it is considered violence in a legitimate way to gain sovereignty. Revolution only aims to redistribute permanent power with or without the help of democracy. The United States revolution in 1783 and Indonesia in 1949 are both described as revolutions that resulted in the separation of the state. ",
    "original": "perbedaan antara kemerdekaan dan revolusi telah diperdebatkan sejak lama karena dianggap kekerasan dengan cara yang sah untuk mendapatkan kedaulatan. revolusi hanya bertujuan untuk mendistribusikan kembali kekuasaan dengan atau tanpa unsur demokrasi. revolusi amerika serikat tahun 1783 dan indonesia tahun 1949 keduanya digambarkan sebagai revolusi yang mengakibatkan pemisahan negara. "
  },
  "sources": [
    {
      "translated": "the difference between independence and revolution has long been debated because it is considered violence carried