In [3]:
pip install reportlab

Collecting reportlab
  Downloading reportlab-4.4.0-py3-none-any.whl.metadata (1.8 kB)
Downloading reportlab-4.4.0-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: reportlab
Successfully installed reportlab-4.4.0


In [10]:
# Install necessary libraries in Colab
!sudo apt-get install -y tesseract-ocr
!pip install pytesseract pillow transformers googletrans==4.0.0-rc1 reportlab

import pytesseract
from PIL import Image
from transformers import pipeline
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from google.colab import files

# Initialize Tesseract and Hugging Face summarization pipeline
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'
summarizer = pipeline("summarization", model="t5-small")

# Function to extract text from an image
def extract_text_from_image(image_path):
    try:
        img = Image.open(image_path)
        text = pytesseract.image_to_string(img, lang="eng+ara")  # Supports Arabic and English
        return text
    except Exception as e:
        return f"Error processing image: {e}"

# Function to analyze contract text
def analyze_contract(contract_text, language="en"):
    clauses = contract_text.split("\n")
    results = []

    for clause in clauses:
        if clause.strip():
            summary = summarizer(clause, max_length=50, min_length=10, do_sample=False)
            results.append({"clause": clause, "summary": summary[0]['summary_text']})

    # Translate to Arabic if requested
    if language == "ar":
        from googletrans import Translator
        translator = Translator()
        results = [{"clause": res["clause"],
                    "summary": translator.translate(res["summary"], dest="ar").text}
                   for res in results]

    return results

# Function to save contract analysis to a PDF
def save_analysis_to_pdf(analysis_results, filename="contract_analysis.pdf"):
    c = canvas.Canvas(filename, pagesize=letter)
    width, height = letter

    x_position, y_position = 50, height - 50

    c.drawString(50, height - 30, "Contract Analysis Report")
    c.drawString(50, height - 50, "-" * 30)

    for clause_info in analysis_results:
        text = f"Clause: {clause_info['clause']}\nSummary: {clause_info['summary']}\n"
        lines = text.split("\n")

        for line in lines:
            c.drawString(x_position, y_position, line)
            y_position -= 20

            if y_position < 50:
                c.showPage()
                y_position = height - 50

    c.save()
    print(f"PDF saved successfully as {filename}")

# Upload an image in Colab and extract text
uploaded = files.upload()  # Let the user upload an image
image_path = list(uploaded.keys())[0]  # Get the uploaded image filename
contract_text = extract_text_from_image(image_path)

print("\nExtracted Contract Text:\n", contract_text)

# Analyze the contract text
language = input("\nEnter output language ('en' for English, 'ar' for Arabic): ")
analysis_results = analyze_contract(contract_text, language=language)

print("\nContract Analysis:\n")
for clause_info in analysis_results:
    print(f"Clause: {clause_info['clause']}")
    print(f"Summary: {clause_info['summary']}\n")

# Save results to a PDF file
save_analysis_to_pdf(analysis_results)

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
tesseract-ocr is already the newest version (4.1.1-2.1build1).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.


Device set to use cpu


Saving NBA-uniform-player-contract-01.png to NBA-uniform-player-contract-01 (4).png

Extracted Contract Text:
 EXHIBIT A

\ NATIONAL BASKETBALL ASSOCIATION
UNIFORM PLAYER CONTRACT

THIS AGREEMENT made this day of. , is by and between

(hereinafter called the “Team’), a member of the National

Basketball Association (hereinafter called the “NBA” or “League”) and ;

an individual whose address is shown below (hereinafter called the “Player”). In consideration
of the mutual promises hereinafter contained, the parties hereto promise and agree as follows:

1. TERM.

The Team hereby employs the Player as a skilled basketball player for a term of
year(s) from the Ist day of September .

2. SERVICES.

The services to be rendered by the Player pursuant to this Contract shall include:
(a) training camp, (b) practices, meetings, workouts, and skill or conditioning sessions conducted
by the Team during the Season, (c) games scheduled for the Team during any Regular Season,
(d) Exhibition games sch

Your max_length is set to 50, but your input_length is only 8. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=4)
Your max_length is set to 50, but your input_length is only 17. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=8)
Your max_length is set to 50, but your input_length is only 12. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=6)
Your max_length is set to 50, but your input_length is only 18. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=9)
Your max_leng


Contract Analysis:

Clause: EXHIBIT A
Summary: EXHIBIT A - EXHABIT A

Clause: \ NATIONAL BASKETBALL ASSOCIATION
Summary: NATIONAL BASKETBALL ASSOCIATION .

Clause: UNIFORM PLAYER CONTRACT
Summary: UNIFORM PLAYER CONTRACT .

Clause: THIS AGREEMENT made this day of. , is by and between
Summary: THIS AGREEMENT made this day of. is by and between .

Clause: (hereinafter called the “Team’), a member of the National
Summary: a member of the national team is called the team . the team is a national member .

Clause: Basketball Association (hereinafter called the “NBA” or “League”) and ;
Summary: basketball association (hereinafter called the "NBA" or "League") and nba .

Clause: an individual whose address is shown below (hereinafter called the “Player”). In consideration
Summary: an individual whose address is shown below (hereinafter called the “Player”). In consideration, an individual may be referred to as a player .

Clause: of the mutual promises hereinafter contained, the parties here