In [1]:
#!pip install PyMuPDF
#!pip install transformers
#!pip install torch
#!pip install pdfplumber

In [2]:
import fitz  # PyMuPDF
import os

# Step 1: Define the file path
file_path = "sample_contracts/sample_contract.pdf"

# Step 2: Debug: Check if file exists
print("File exists?", os.path.exists(file_path))
print("Full path:", os.path.abspath(file_path))

# Step 3: Define function to extract text
def extract_text_from_pdf(path):
    doc = fitz.open(path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# Step 4: Use the function
if os.path.exists(file_path):
    pdf_text = extract_text_from_pdf(file_path)
    print("Text extraction successful!")
    print(pdf_text[:1000])  # Show first 1000 characters
else:
    print("File not found. Please check the path.")


File exists? True
Full path: /workspaces/AI-for-Law/sample_contracts/sample_contract.pdf
Text extraction successful!
 
 
 
 
                                  EXHIBIT 10.1 
 
 
 
 
 
 
                                 LOAN AGREEMENT 
 
 
This  Loan  Agreement  (this  "Agreement"),  is made as of  September  21,  2014 
between: 
 
     1. Americann, Inc. ("Americann") and 
 
     2. Wellness Group Pharms, LLC ("WGP"). 
 
 
                                  INTRODUCTION 
 
     WGP has requested that  Americann  extend credit to WGP in the form of loan 
not to exceed $4,760,000 to be used to fund WGP's  acquisition,  development and 
construction of property,  plant, equipment,  all necessary permits and start-up 
costs and  expenses  in  accordance  with those costs and  operating  income and 
expense  projections  shown on the attached Exhibit A. The Property on which the 
cultivation and growing facility will be constructed is described on Exhibit B. 
 
                               

## Summarization of the contract

In [4]:
from transformers import pipeline

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

summary = summarizer(pdf_text[:1024])  # Truncate if too long
print(summary[0]['summary_text'])

Device set to use cpu


 Americann, Inc. ("Americann") and Wellness Group Pharms, LLC ("WGP") agree to a loan of $4,760,000. The loan will be used to fund WGP's acquisition, development and construction of property, plant, equipment, and all necessary permits.


## Questions and answers on the contract

In [5]:
from transformers import pipeline

qa = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")

question = "What is the termination clause?"
answer = qa(question=question, context=pdf_text)
print(answer['answer'])

config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Device set to use cpu


WGP will not, directly or indirectly, negotiate with 
any third party


## Legal Bert model for classification or fine tuning

In [7]:
from transformers import AutoTokenizer, AutoModel

tokenizer = AutoTokenizer.from_pretrained("nlpaueb/legal-bert-base-uncased")
model = AutoModel.from_pretrained("nlpaueb/legal-bert-base-uncased")

inputs = tokenizer(pdf_text[:512], return_tensors="pt", truncation=True)
outputs = model(**inputs)


In [8]:
outputs

BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[ 0.2751,  0.0932, -0.2236,  ...,  0.0792,  0.0950, -0.3112],
         [-0.0794,  0.5820, -0.2845,  ..., -0.7664,  0.3378,  0.4985],
         [-0.6444,  0.7205, -0.4856,  ..., -0.2679, -0.3733, -0.3072],
         ...,
         [ 0.5615,  0.0465,  0.1865,  ...,  0.1175,  0.2380, -0.0338],
         [ 0.4752, -0.1167, -0.0840,  ...,  0.1972,  0.2874, -0.2859],
         [-0.1397, -0.0451, -0.0977,  ..., -0.1370,  0.2469, -0.1779]]],
       grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[-0.2625,  0.3953,  0.9594,  0.1259,  0.4737,  0.4718, -0.5889, -0.3032,
          0.9994,  0.8982,  0.9722,  0.0730,  0.7978, -0.9983,  0.2991, -0.0474,
          0.0905,  0.1256, -0.4101,  0.9608,  0.4624, -0.6240,  0.9992, -0.2320,
         -0.5958, -0.4801, -0.0378, -0.9768,  0.2877,  0.7969, -0.1329, -0.4353,
          0.9915,  0.4227,  0.9704, -0.0053, -0.4190,  0.0967,  0.5680,  0.6167,
         -0.3948, -0.5515, -0.93