In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
import spacy
import string

In [None]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [None]:
def preprocess_text(text):
    stop_words = set(stopwords.words('english'))

    stemmer = PorterStemmer()

    lemmatizer = WordNetLemmatizer()

    nlp = spacy.load('en_core_web_sm')

    doc = nlp(text)

    tokens = [token.lemma_.lower() for token in doc if token.is_alpha and not token.is_stop]

    return ' '.join(tokens)




In [None]:
def extract_entities(text):
    # Process the text with spaCy to identify entities
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

In [None]:
def extract_key_terms(text):
    # Process the text with spaCy
    doc = nlp(text)

    # Define keywords or phrases indicative of key employment terms
    key_terms = ['salary', 'benefits', 'duties', 'responsibilities', 'termination', 'contract length', 'non-compete', 'confidentiality']

    # Extract sentences containing these key terms
    key_sentences = []
    for sentence in doc.sents:
        if any(term in sentence.text.lower() for term in key_terms):
            key_sentences.append(sentence.text.strip())

    return key_sentences

In [None]:
def extract_information(text):
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    key_phrases = [chunk.text for chunk in doc.noun_chunks if chunk.text.lower() not in nlp.Defaults.stop_words]

    return entities, key_phrases

In [None]:
import spacy
from spacy.matcher import Matcher


In [None]:
nlp = spacy.load('en_core_web_sm')
matcher = Matcher(nlp.vocab)


In [None]:
# Patterns for key terms often found in employment contracts
key_terms_patterns = [
    [{"LOWER": "salary"}],
    [{"LOWER": "benefits"}],
    [{"LOWER": "duties"}],
    [{"LOWER": "responsibilities"}],
    [{"LOWER": "termination"}],
    [{"LOWER": "contract"}, {"LOWER": "length"}],
    [{"LOWER": "non-compete"}],
    [{"LOWER": "confidentiality"}],
    [{"LOWER": "jurisdiction"}]
]

matcher.add("KEY_TERMS", key_terms_patterns)


In [None]:
def extract_contract_details(text):
    doc = nlp(text)

    # NER
    entities = [(ent.text, ent.label_) for ent in doc.ents]

    # Key phrase extraction
    matches = matcher(doc)
    key_phrases = [doc[start:end].text for match_id, start, end in matches]

    return entities, key_phrases


In [None]:
from transformers import pipeline
from transformers import T5ForConditionalGeneration, T5Tokenizer

In [None]:
model_name = 't5-base'
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
def summarize_contract(contract_text, max_length=512, min_length=150):
    # Pre-process the contract text to fit the model's needs
    inputs = tokenizer.encode("summarize: " + contract_text, return_tensors="pt", max_length=1024, truncation=True)

    # Generate summary
    summary_ids = model.generate(inputs, max_length=max_length, min_length=min_length, length_penalty=2.0, num_beams=4, early_stopping=True)
    contract_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    return contract_summary




In [None]:
!pip install sentence-transformers




In [None]:
!pip install openai



In [None]:
import spacy
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from transformers import pipeline, T5ForConditionalGeneration, T5Tokenizer
from sentence_transformers import SentenceTransformer, util
import openai


In [None]:
!python -m spacy download en_core_web_trf


Collecting en-core-web-trf==3.7.3
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.7.3/en_core_web_trf-3.7.3-py3-none-any.whl (457.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m457.4/457.4 MB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_trf')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
import spacy
from transformers import pipeline

# Load spaCy model for linguistic features
nlp = spacy.load('en_core_web_trf')

# Initialize sentiment analysis model
sentiment_analyzer = pipeline("sentiment-analysis")

def advanced_sentiment_analysis(contract_text):
    # Process the contract text with spaCy
    doc = nlp(contract_text)

    # Analyze sentiment for each sentence or clause
    total_score = 0
    total_length = 0
    sentiments = []
    for sentence in doc.sents:
        sentiment_result = sentiment_analyzer(sentence.text)
        sentiment = sentiment_result[0]['label']
        score = sentiment_result[0]['score']

        # Store sentence, sentiment, and score
        sentiments.append((sentence.text, sentiment, score))

        # Update total score and length for overall sentiment calculation
        length = len(sentence.text)
        total_length += length
        # Weight the score by sentence length
        total_score += score * length

    # Calculate overall sentiment score for the contract
    overall_sentiment_score = total_score / total_length if total_length > 0 else 0

    return {
        'sentences': sentiments,
        'overall_sentiment_score': overall_sentiment_score
    }

# Example contract text
contract_text = """
This Employment Agreement is entered into between John Doe ("Employee") and XYZ Corporation ("Employer").
The Employee agrees to provide software development services for a monthly salary of $10,000.
The term of employment is two years, commencing on January 1, 2022, and ending on December 31, 2023, unless earlier terminated in accordance with the provisions herein.
The Employee agrees to maintain confidentiality concerning all proprietary information during and after the term of employment.
This agreement is governed by the laws of the State of California.
"""

# Run advanced sentiment analysis
analysis_results = advanced_sentiment_analysis(contract_text)
print("Detailed Sentiment Analysis:")
for sentence_data in analysis_results['sentences']:
    print(f"Sentence: {sentence_data[0]}")
    print(f"Sentiment: {sentence_data[1]}, Score: {sentence_data[2]}")
    print("-----")

print(f"Overall Contract Sentiment Score: {analysis_results['overall_sentiment_score']}")


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Detailed Sentiment Analysis:
Sentence: 
This Employment Agreement is entered into between John Doe ("Employee") and XYZ Corporation ("Employer").
Sentiment: NEGATIVE, Score: 0.9706593751907349
-----
Sentence: 

Sentiment: POSITIVE, Score: 0.7481208443641663
-----
Sentence: The Employee agrees to provide software development services for a monthly salary of $10,000.
The term of employment is two years, commencing on January 1, 2022, and ending on December 31, 2023, unless earlier terminated in accordance with the provisions herein.
Sentiment: NEGATIVE, Score: 0.9206486344337463
-----
Sentence: 

Sentiment: POSITIVE, Score: 0.7481208443641663
-----
Sentence: The Employee agrees to maintain confidentiality concerning all proprietary information during and after the term of employment.
Sentiment: POSITIVE, Score: 0.9915327429771423
-----
Sentence: 
This agreement is governed by the laws of the State of California.
Sentiment: POSITIVE, Score: 0.9341626763343811
-----
Sentence: 

Sentiment: 

In [None]:
from transformers import pipeline

def generate_amendments_with_transformers(contract_text, contract_clause, context_info, model="distilgpt2", max_tokens=1000):
    """
    Generates amendments for a contract clause using a model from Hugging Face's transformers.

    Args:
        contract_text (str): The text of the entire contract.
        contract_clause (str): The specific clause to amend.
        context_info (str): Additional context or details about the clause.
        model (str): The Hugging Face model to use.
        max_tokens (int): The maximum number of tokens to generate.

    Returns:
        str: The suggested amendments.
    """

    # Initialize the pipeline with the specified model
    generator = pipeline("text-generation", model=model, max_length=max_tokens)

    # Constructing the elaborate prompt
    prompt = f"""
    You are a sophisticated AI legal advisor updated with the 2024 legal standards and employment practices. Your task is to analyze the following employment contract, focusing on its adherence to contemporary legal norms, ethical considerations, and best practices in employment. Provide detailed amendments and explanations, considering key aspects of 2024 employment law:

    1. Remote Work and Flexibility
    2. Data Privacy and Protection
    3. AI and Automation in the Workplace
    4. Inclusivity and Anti-Discrimination
    5. Employee Classification and Gig Economy Considerations
    6. Sustainability and Corporate Responsibility
    7. Cybersecurity Responsibilities

    Contract Context:
    {contract_text}

    Clause for Amendment:
    {contract_clause}

    Context Information:
    {context_info}

    Based on the above, suggest detailed amendments to improve the clause, aligning it with 2024's legal landscape and employment practices.
    """

    # Generate the amendment suggestions
    response = generator(prompt)

    # Return the generated text
    return response[0]['generated_text'].strip()

# Example usage (you should replace placeholders with actual contract details)
contract_text = """This Employment Agreement ("Agreement") is made and entered into as of January 1, 2024, by and between Jane Doe ("Employee") and Acme Corp ("Employer").

1. Position and Duties: The Employee agrees to serve in the position of Senior Software Engineer and will perform all duties assigned by the Employer related to such position.

2. Compensation: The Employee shall receive an annual salary of $120,000, payable in monthly installments.

3. Confidentiality: The Employee agrees to maintain confidentiality concerning all proprietary information of the Employer during and after the term of employment.

4. Termination: This Agreement may be terminated by either party with a 30-day written notice.

The Employee acknowledges that they have read and understood the terms of this Agreement and agree to be bound by them.
"""
clause_to_amend = "The Employee agrees to maintain confidentiality concerning all proprietary information of the Employer during and after the term of employment."
context_info = "The confidentiality clause is intended to protect the company's proprietary information. However, there are concerns that it might be too broad and could unfairly restrict the Employee's future career opportunities. The clause should be amended to ensure it is fair, clearly defined, and compliant with recent legal standards on employee rights and data privacy."

# Generate amendment suggestions
amendments = generate_amendments_with_transformers(contract_text, clause_to_amend, context_info)
print("Amendment Suggestions:")
print(amendments)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Amendment Suggestions:
You are a sophisticated AI legal advisor updated with the 2024 legal standards and employment practices. Your task is to analyze the following employment contract, focusing on its adherence to contemporary legal norms, ethical considerations, and best practices in employment. Provide detailed amendments and explanations, considering key aspects of 2024 employment law:

    1. Remote Work and Flexibility
    2. Data Privacy and Protection
    3. AI and Automation in the Workplace
    4. Inclusivity and Anti-Discrimination
    5. Employee Classification and Gig Economy Considerations
    6. Sustainability and Corporate Responsibility
    7. Cybersecurity Responsibilities

    Contract Context:
    This Employment Agreement ("Agreement") is made and entered into as of January 1, 2024, by and between Jane Doe ("Employee") and Acme Corp ("Employer").

1. Position and Duties: The Employee agrees to serve in the position of Senior Software Engineer and will perform all 

In [None]:
import spacy
from sentence_transformers import SentenceTransformer, util

# Load NLP model and sentence transformer model
nlp = spacy.load('en_core_web_sm')
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')

# Dummy knowledge base of clauses and their associated risk scores
knowledge_base_clauses = {
    "The employee must not disclose confidential information.": 0.9,
    "The employee is required to work beyond normal working hours without overtime pay.": 0.8,
    "The employee shall be entitled to only statutory minimum vacation days irrespective of tenure.": 0.7,
    "The employee agrees to a non-compete clause that extends for two years post-termination, applicable globally.": 0.95,
    "The employee must not engage in any other employment or consulting work without prior approval.": 0.6,
    "The employer reserves the right to unilaterally change the terms of employment without notice.": 0.85,
    "The employee agrees to mandatory arbitration in the event of disputes, waiving the right to trial.": 0.75,
    "The employer can terminate employment at any time without cause or prior notice.": 0.8,
    "The employee is required to use personal devices for work without compensation or security measures.": 0.7,
    "The contract lacks clear definitions of job responsibilities, allowing for significant unilateral changes.": 0.65,
    "The employee's performance evaluations are solely based on quantitative metrics, ignoring qualitative assessments.": 0.55,
    "The contract allows for employee monitoring without clear limits or privacy protections.": 0.8,
    "The employer is not obligated to contribute to a retirement plan.": 0.5,
    "The contract does not provide for regular performance reviews or salary adjustments.": 0.6,
    "The employee waives the right to claim ownership of intellectual property created during employment.": 0.9,
    "The contract includes a clause that limits the employee's right to discuss workplace conditions publicly.": 0.7,
    "The employment is contingent upon passing recurring background checks without clear cause.": 0.65,
    "The contract includes excessive geographical limitations on future employment post-termination.": 0.75,
    "The employee is responsible for costs related to training and professional development.": 0.55,
    "The employer does not provide a clear mechanism for reporting or addressing workplace grievances.": 0.6
}


def analyze_risk_and_sentiment(text):
    doc = nlp(text)
    overall_sentiment_score = 0
    total_sentences = 0

    # Semantic analysis to find similar clauses in a knowledge base
    similar_clauses = []
    contract_risk_score = 0

    for sentence in doc.sents:
        # Basic sentiment analysis (this part can be replaced with a more sophisticated sentiment analyzer if available)
        sentiment = sentence.sentiment
        overall_sentiment_score += sentiment
        total_sentences += 1

        sentence_embedding = similarity_model.encode(str(sentence))
        for clause, risk_score in knowledge_base_clauses.items():
            clause_embedding = similarity_model.encode(clause)
            similarity_score = util.pytorch_cos_sim(sentence_embedding, clause_embedding)

            if similarity_score > 0.75:  # Threshold for similarity
                similar_clauses.append((sentence, clause, similarity_score, risk_score))
                contract_risk_score += risk_score * similarity_score  # Calculate risk score

    # Normalize sentiment score
    average_sentiment_score = overall_sentiment_score / total_sentences if total_sentences > 0 else 0

    # Normalize risk score
    normalized_risk_score = contract_risk_score / len(similar_clauses) if similar_clauses else 0

    return {
        'average_sentiment_score': average_sentiment_score,
        'similar_clauses': similar_clauses,
        'normalized_risk_score': normalized_risk_score
    }

# Example usage
contract_text = """
This employment agreement ensures that the employee must not disclose confidential information.
The employee is required to work beyond normal working hours without overtime pay.
"""

analysis_results = analyze_risk_and_sentiment(contract_text)
print("Analysis Results:")
print(analysis_results)


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Analysis Results:
{'average_sentiment_score': 0.0, 'similar_clauses': [(
This employment agreement ensures that the employee must not disclose confidential information.
, 'The employee must not disclose confidential information.', tensor([[0.8645]]), 0.9), (The employee is required to work beyond normal working hours without overtime pay.
, 'The employee is required to work beyond normal working hours without overtime pay.', tensor([[1.]]), 0.8)], 'normalized_risk_score': tensor([[0.7890]])}
