In [3]:
# ! pip install transformers

In [1]:
!pip show transformers

Name: transformers
Version: 4.26.1
Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
Home-page: https://github.com/huggingface/transformers
Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)
Author-email: transformers@huggingface.co
License: Apache
Location: /opt/homebrew/Caskroom/miniconda/base/envs/ml38/lib/python3.8/site-packages
Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, tokenizers, tqdm
Required-by: sentence-transformers


In [23]:
import warnings
warnings.filterwarnings("ignore")

In [30]:
from transformers import BertTokenizer, BertForSequenceClassification, pipeline

# Load the BERT tokenizer and model for summarization
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")

In [25]:
# Define the text to be summarized
input_text = """
     This Agreement ("Agreement") is entered into on this [Date], (the "Effective Date"), by and between [Party A], with its principal place of business at [Address], and [Party B], with its principal place of business at [Address], collectively referred to as the "Parties."

WHEREAS, [Party A] and [Party B] desire to enter into a business relationship for the purpose of [Purpose of the Agreement]; and

WHEREAS, the Parties wish to set forth the terms and conditions governing their relationship;

NOW, THEREFORE, in consideration of the mutual covenants contained herein, the Parties agree as follows:

1. Definitions
   1.1. "Confidential Information" shall mean any non-public, proprietary information disclosed by one Party to the other Party under this Agreement.
   1.2. "Term" shall mean the duration of this Agreement as specified in Section 4.
   ...

2. Obligations of [Party A]
   2.1. [Detailed description of Party A's obligations]
   ...

3. Obligations of [Party B]
   3.1. [Detailed description of Party B's obligations]
   ...

4. Term and Termination
   4.1. This Agreement shall commence on the Effective Date and shall continue in effect until terminated as provided herein.
   4.2. [Conditions and procedures for termination]
   ...

5. Confidentiality
   5.1. Both Parties agree to keep all Confidential Information received from the other Party confidential and to use such information only for the purposes of this Agreement.
   ...

6. Governing Law
   6.1. This Agreement shall be governed by and construed in accordance with the laws of the State of [State], without regard to its conflicts of law principles.

7. Entire Agreement
   7.1. This Agreement constitutes the entire agreement between the Parties and supersedes all prior understandings, agreements, representations, and warranties.

IN WITNESS WHEREOF, the Parties hereto have executed this Agreement as of the Effective Date.

[Signature of Party A]      [Signature of Party B]
[Name of Party A]           [Name of Party B]
[Title of Party A]          [Title of Party B]
[Date]                      [Date]
  """

In [26]:
# Tokenize and summarize the text
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
summary = summarizer(input_text, max_length=150, min_length=30, do_sample=False)


In [27]:
# Print the summarized text
bert_summary = summary[0]["summary_text"]

In [28]:
bert_summary

'This Agreement ("Agreement") is entered into on this [Date], (the "Effective Date"), by and between [Party A] and [Party B] Both Parties agree to keep all Confidential Information received from the other Party confidential and to use such information only for the purposes of this Agreement.'

Spacy

In [29]:
import spacy
from spacy.lang.en import English
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer

# Load SpaCy and create a sentence tokenization pipeline
nlp = English()
nlp.add_pipe("sentencizer")

# Input text (replace with your text)
input_text = """
This Agreement ("Agreement") is entered into on this [Date], (the "Effective Date"), by and between [Party A], with its principal place of business at [Address], and [Party B], with its principal place of business at [Address], collectively referred to as the "Parties."

WHEREAS, [Party A] and [Party B] desire to enter into a business relationship for the purpose of [Purpose of the Agreement]; and

WHEREAS, the Parties wish to set forth the terms and conditions governing their relationship;

NOW, THEREFORE, in consideration of the mutual covenants contained herein, the Parties agree as follows:

1. Definitions
   1.1. "Confidential Information" shall mean any non-public, proprietary information disclosed by one Party to the other Party under this Agreement.
   1.2. "Term" shall mean the duration of this Agreement as specified in Section 4.
   ...

2. Obligations of [Party A]
   2.1. [Detailed description of Party A's obligations]
   ...

3. Obligations of [Party B]
   3.1. [Detailed description of Party B's obligations]
   ...

4. Term and Termination
   4.1. This Agreement shall commence on the Effective Date and shall continue in effect until terminated as provided herein.
   4.2. [Conditions and procedures for termination]
   ...

5. Confidentiality
   5.1. Both Parties agree to keep all Confidential Information received from the other Party confidential and to use such information only for the purposes of this Agreement.
   ...

6. Governing Law
   6.1. This Agreement shall be governed by and construed in accordance with the laws of the State of [State], without regard to its conflicts of law principles.

7. Entire Agreement
   7.1. This Agreement constitutes the entire agreement between the Parties and supersedes all prior understandings, agreements, representations, and warranties.

IN WITNESS WHEREOF, the Parties hereto have executed this Agreement as of the Effective Date.

[Signature of Party A]      [Signature of Party B]
[Name of Party A]           [Name of Party B]
[Title of Party A]          [Title of Party B]
[Date]                      [Date]

"""

# Tokenize the input text into sentences using SpaCy
doc = nlp(input_text)
sentences = [sent.text for sent in doc.sents]

# Convert sentences to a plain text format for summarization
text = " ".join(sentences)

# Use Sumy for text summarization (you may need to install the Sumy library)
parser = PlaintextParser.from_string(text, Tokenizer("english"))
summarizer = LexRankSummarizer()

# Set the number of sentences in the summary (adjust as needed)
summary_sentences_count = 3
summary = summarizer(parser.document, summary_sentences_count)

# Print the summarized text
sum = ""

for sentence in summary:
    spacy_summary = sum + str(sentence)

print(spacy_summary) 

Both Parties agree to keep all Confidential Information received from the other Party confidential and to use such information only for the purposes of this Agreement.


Similarity Metrics

In [8]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('bert-base-nli-mean-tokens')

In [9]:
sentences = [spacy_summary, bert_summary]

In [10]:
sentence_embeddings = model.encode(sentences)

In [11]:
sentence_embeddings.shape

(2, 768)

Cosine Similarity

In [12]:
from sklearn.metrics.pairwise import cosine_similarity

In [13]:
similarity = cosine_similarity(
    [sentence_embeddings[0]],
    sentence_embeddings[1:])

In [14]:
print(similarity)

[[0.9369402]]


Sorensen Similarity

In [15]:
def calculate_sorensen_similarity(string1, string2):
    set1 = set(string1)
    set2 = set(string2)
    
    intersection = len(set1.intersection(set2))
    union = len(set1) + len(set2)
    
    sorensen_distance = 1 - (2 * intersection / union)
    similarity = 1 - sorensen_distance
    return similarity

In [16]:
similarity1 = calculate_sorensen_similarity(spacy_summary, bert_summary)


In [17]:
similarity1

0.8307692307692308

Jaccard Similarity

In [18]:
def calculate_jaccard_similarity(string1, string2):
    set1 = set(string1)
    set2 = set(string2)
    
    intersection = len(set1.intersection(set2))
    union = len(set1.union(set2))
    
    jaccard_distance = 1 - (intersection / union)
    similarity = 1 - jaccard_distance
    return similarity

In [19]:
similarity2 = calculate_jaccard_similarity(
    spacy_summary, bert_summary)

In [20]:
similarity2

0.7105263157894737