In [1]:
import os
from dotenv import load_dotenv
import openai
import requests
import json
import time
import logging
from datetime import datetime
import fitz
import tensorflow as tf
import keras
import keras_nlp
import logging
from transformers import AutoTokenizer, TFAutoModel
import tensorflow as tf
import docx 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load environment variables
try:
    load_dotenv()
    openai.api_key = os.environ.get("OPENAI_API_KEY")
    if not openai.api_key:
        raise ValueError("OpenAI API key not found in environment variables.") 

    logging.info("Environment variables loaded successfully.")
except Exception as e:
    logging.error(f"Error loading environment variables: {e}")
    raise

In [3]:
# Initialize OpenAI client
client = openai.OpenAI()


In [4]:

model = os.getenv("MODEL_NAME", "gpt-4o-mini")

# Create Contract Checking Assistant
instructions = """
You are a Contract Checking Assistant. Your task is to receive written contracts, check them against specified regulations, identify missing parts, suggest modifications, and provide recommendations for improvement.

When a contract is provided, follow these steps:

1. **Check for Compliance**:
    - Verify that the contract complies with Iran's regulations.
    - Check compliance with ICC and Incoterms regulations.
2. **Identify Missing Parts**:
    - Highlight any sections that are missing or incomplete.
3. **Suggest Modifications**:
    - Recommend changes to ensure the contract meets all legal and regulatory requirements.
4. **Provide Improvements**:
    - Offer suggestions on how to improve the contract for clarity, fairness, and comprehensiveness.
5. **Compare with Similar Contracts**:
    - Compare the contract with similar contracts to identify common practices and potential improvements.
6. **Identify Weaknesses and Challenges**:
    - Find weaknesses and challenging points in the contract and suggest modifications for these parts.

Your responses should be clear, concise, and professional. Always provide detailed explanations for your suggestions and ensure that your feedback is actionable.
"""

In [5]:
# Create Contract_Checking_Assistant
Contract_Checking_Assistant = client.beta.assistants.create(
    name="Contract_Check_Assistant_V4",
    instructions=instructions,
    model=model,
    tools=[
        {"type": "file_search"},
        {"type": "code_interpreter"},
    ],
    metadata={"project": "Contract Review"}
)

Contract_Checking_Assistant

Assistant(id='asst_tvQRDI4xxvg1r73jHrNwJu6y', created_at=1727877357, description=None, instructions="\nYou are a Contract Checking Assistant. Your task is to receive written contracts, check them against specified regulations, identify missing parts, suggest modifications, and provide recommendations for improvement.\n\nWhen a contract is provided, follow these steps:\n\n1. **Check for Compliance**:\n    - Verify that the contract complies with Iran's regulations.\n    - Check compliance with ICC and Incoterms regulations.\n2. **Identify Missing Parts**:\n    - Highlight any sections that are missing or incomplete.\n3. **Suggest Modifications**:\n    - Recommend changes to ensure the contract meets all legal and regulatory requirements.\n4. **Provide Improvements**:\n    - Offer suggestions on how to improve the contract for clarity, fairness, and comprehensiveness.\n5. **Compare with Similar Contracts**:\n    - Compare the contract with similar contracts to identify common practices a

In [6]:
# Create a vector store called "The Rules"
vector_store = client.beta.vector_stores.create(name="The Rules")
vector_store

VectorStore(id='vs_iPxWuQl6r96nIPu07m5flEeK', created_at=1727877515, file_counts=FileCounts(cancelled=0, completed=0, failed=0, in_progress=0, total=0), last_active_at=1727877515, metadata={}, name='The Rules', object='vector_store', status='completed', usage_bytes=0, expires_after=None, expires_at=None)

In [7]:
from docx import Document
import logging

def extract_text_from_word(file_path):
    text = ""
    try:
        doc = Document(file_path)
        for paragraph in doc.paragraphs:
            text += paragraph.text + "\n"
    except Exception as e:
        logging.error(f"Error extracting text from Word file: {e}")
        raise
    return text

# Upload regulations to the vector store
regulations_file_path = "./iran_trade_rules.docx"
try:
    regulations_text = extract_text_from_word(regulations_file_path)
except Exception as e:
    logging.error(f"Error extracting text from regulations file: {e}")
    raise
regulations_text[1:30]

'ﺑﺎب اول\nﺗﺠﺎر و ﻣﻌﺎﻣﻼت ﺗﺠﺎرﺗﯽ\n'

In [8]:
# Function for Text Embedding with TensorFlow
def embed_text_large(text):
    try:
        # Load the tokenizer and model for XLM-RoBERTa Large
        model_name = "xlm-roberta-large"
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = TFAutoModel.from_pretrained(model_name)
        
        # Tokenize the input text
        inputs = tokenizer(text, return_tensors="tf", padding=True, truncation=True)
        
        # Get the embeddings from the model (CLS token represents the whole sentence)
        outputs = model(**inputs)
        embedding = outputs.last_hidden_state[:, 0, :]  # CLS token
        
        return embedding.numpy().tolist()
    except Exception as e:
        logging.error(f"Error embedding text: {e}")
        raise

In [9]:
# Embed the text using TensorFlow
try:
    embedding_large = embed_text_large(regulations_text)
except Exception as e:
    logging.error(f"Error embedding regulations text: {e}")
    raise













Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFXLMRobertaModel: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing TFXLMRobertaModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFXLMRobertaModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFXLMRobertaModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFXLMRobertaModel for predictions without further training.


In [10]:
# Save the embedded text to a file
def save_embeddings_to_file(embeddings, file_path):
    try:
        with open(file_path, 'w') as f:
            json.dump(embeddings, f)
        logging.info(f"Embeddings saved to {file_path}")
    except Exception as e:
        logging.error(f"Error saving embeddings to file: {e}")
        raise

In [11]:
# Upload the file to the vector store
def upload_to_vector_store(file_path, vector_store_id):
    try:
        with open(file_path, 'r') as f:
            embeddings = json.load(f)
        client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
        vector_store = client.vector_stores.get(vector_store_id)
        vector_store.upload(embeddings)
        logging.info(f"Embeddings uploaded to vector store {vector_store_id}")
    except Exception as e:
        logging.error(f"Error uploading embeddings to vector store: {e}")
        raise

In [12]:
file_path = "./embedded_iran_trade_rules.json"
save_embeddings_to_file(embedding_large,file_path)

In [13]:
file_stream = open(file_path, "rb")
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
    vector_store_id=vector_store.id, files=[file_stream]
)
file_stream.close()

print(file_batch.status)
print(file_batch.file_counts)

completed
FileCounts(cancelled=0, completed=1, failed=0, in_progress=0, total=1)


In [14]:
# %%
# Update the assistant with the vector store
Contract_Checking_Assistant = client.beta.assistants.update(
    assistant_id=Contract_Checking_Assistant.id,
    tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}}
)

In [15]:
# Create a sample thread with a non-empty message content
thread = client.beta.threads.create()
thread.id

'thread_qWbTKZYSuwToACLpXW2hVmp4'

In [17]:
Contract_Checking_Assistant = client.beta.assistants.update(
    assistant_id=Contract_Checking_Assistant.id,
    tools=[
        {"type": "file_search"},
        {
            "type": "function",
            "function": {
                "name": "contract_check",
                "description": "Check the contract against specified regulations.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "contract_text": {
                            "type": "string",
                            "description": "The text of the contract to be checked."
                        },
                        "regulations": {
                            "type": "array",
                            "items": {
                                "type": "string"
                            },
                            "description": "A list of regulations to check against."
                        }
                    },
                    "required": ["contract_text", "regulations"]
                }
            }
        }
    ]
)
