In [None]:
!pip install pdf2image openai==0.28
!apt-get install -y poppler-utils
!pip install Pillow
!pip install pytesseract
!pip install pdfplumber
!pip install fpdf

Collecting pdf2image
  Downloading pdf2image-1.17.0-py3-none-any.whl (11 kB)
Installing collected packages: pdf2image
Successfully installed pdf2image-1.17.0
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  poppler-utils
0 upgraded, 1 newly installed, 0 to remove and 45 not upgraded.
Need to get 186 kB of archives.
After this operation, 696 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 poppler-utils amd64 22.02.0-2ubuntu0.4 [186 kB]
Fetched 186 kB in 1s (356 kB/s)
Selecting previously unselected package poppler-utils.
(Reading database ... 121925 files and directories currently installed.)
Preparing to unpack .../poppler-utils_22.02.0-2ubuntu0.4_amd64.deb ...
Unpacking poppler-utils (22.02.0-2ubuntu0.4) ...
Setting up poppler-utils (22.02.0-2ubuntu0.4) ...
Processing triggers for man-db (2.10.2-1) ...


In [None]:
import re
from PIL import Image
import pytesseract
import pdfplumber
from pdf2image import convert_from_path
from fpdf import FPDF
import openai
import os

In [None]:
# Get the OpenAI API key from the environment variable
api_key = "sk-proj-K1mZ2v2kmlDIRT6trrAjT3BlbkFJsrcwpzxjOHiFQMTnafcq"
if not api_key:
    st.error("API key not found. Please set the OPENAI_API_KEY environment variable.")
else:
    openai.api_key = api_key

In [None]:
def extract_text_from_image(image_path):
    try:
        image = Image.open(image_path)
        text = pytesseract.image_to_string(image)
        return text
    except Exception as e:
        print(f"Error extracting text from image: {e}")
        return None

In [None]:
def extract_text_from_pdf(file_path):
    try:
        text = ""
        with pdfplumber.open(file_path) as pdf:
            for page in pdf.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error extracting text from PDF: {e}")
        return None

In [None]:
def analyze_contract(text):
    try:
        messages = [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"Analyze the following contract text and extract the contract party, contract number, date of birth, and quitting party:\n\n{text}\n\n"}
        ]
        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=messages,
            max_tokens=500
        )
        analysis_result = response.choices[0].message["content"].strip()
        return analysis_result
    except Exception as e:
        print(f"Error analyzing contract: {e}")
        return None

In [None]:
def extract_information(analysis_result):
    try:
        contract_party = re.search(r'(Vodafone GmbH|.*?GmbH)', analysis_result).group(0)
        contract_number = re.search(r'\b\d{10,11}\b', analysis_result).group(0)
        date_of_birth = re.search(r'\d{2}\.\d{2}\.\d{4}', analysis_result).group(0)
        quitting_party = re.search(r'Max Mustermann|[A-Za-z]+\s[A-Za-z]+', analysis_result).group(0)

        data = {
            "parties": contract_party,
            "contract_number": contract_number,
            "date_of_birth": date_of_birth,
            "quitting_party": quitting_party
        }
        return data
    except Exception as e:
        print(f"Error extracting information: {e}")
        return None

In [None]:
def generate_termination_pdf(data, output_path="/content/drive/My Drive/termination_contract.pdf"):
    try:
        pdf = FPDF()
        pdf.add_page()
        pdf.set_font("Arial", size=12)

        # Add contract party information
        pdf.cell(200, 10, txt="Max Mustermann · Kaulbachstraße 60 · 80539 München", ln=True, align='C')
        pdf.cell(200, 10, txt="", ln=True)  # Empty line

        # Add contract party and contract number information
        pdf.cell(200, 10, txt=f"{data['parties']}", ln=True)
        pdf.cell(200, 10, txt="Mobilfunk", ln=True)
        pdf.cell(200, 10, txt="Kundenbetreuung", ln=True)
        pdf.cell(200, 10, txt="40875 Ratingen", ln=True)
        pdf.cell(200, 10, txt="", ln=True)  # Empty line

        # Add contract number and date of birth information
        pdf.cell(200, 10, txt=f"Vodafone-Handynummer: {data['contract_number']}  Geburtsdatum: {data['date_of_birth']}", ln=True)
        pdf.cell(200, 10, txt="", ln=True)  # Empty line

        # Add termination information
        pdf.set_font("Arial", size=14, style='B')
        pdf.cell(200, 10, txt="Kündigung zum nächstmöglichen Zeitpunkt", ln=True)
        pdf.set_font("Arial", size=12)
        pdf.cell(200, 10, txt="28.06.2024", ln=True, align='R')
        pdf.cell(200, 10, txt="", ln=True)  # Empty line

        # Add termination letter content
        pdf.multi_cell(200, 10, txt="Sehr geehrte Damen und Herren,\n\n"
                                    "hiermit kündige ich meinen Vertrag fristgerecht zum nächstmöglichen Zeitpunkt. "
                                    "Bitte senden Sie mir eine schriftliche Bestätigung der Kündigung unter Angabe des Beendigungszeitpunktes zu.\n\n"
                                    "Mit freundlichen Grüßen\n\n")
        pdf.set_font("Arial", size=16, style='B')
        pdf.cell(200, 10, txt=f"{data['quitting_party']}", ln=True)
        pdf.set_font("Arial", size=12)
        pdf.cell(200, 10, txt=f"{data['quitting_party']}", ln=True)

        pdf.output(output_path)
        print(f"Termination Contract generated: {output_path}")
        return True
    except Exception as e:
        print(f"Error generating PDF: {e}")
        return False

In [None]:
def generate_signature_with_dalle(prompt):
    try:
        response = openai.Image.create(
            prompt=prompt,
            n=1,
            size="256x256"
        )
        image_url = response['data'][0]['url']
        return image_url
    except Exception as e:
        print(f"Error generating image with DALL-E: {e}")
        return None

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# 确保文件路径正确
pdf_path = "/content/drive/My Drive/Insurance1.pdf"
print(f"PDF path: {pdf_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
PDF path: /content/drive/My Drive/Insurance1.pdf


In [None]:
pdf_text = extract_text_from_pdf(pdf_path)
if pdf_text:
    print("Extracted text from PDF:")
    print(pdf_text)
    analysis_result = analyze_contract(pdf_text)
    if analysis_result:
        print("Analysis result from GPT-4:")
        print(analysis_result)
        contract_data = extract_information(analysis_result)
        if contract_data:
            print("Contract data extracted:")
            print(contract_data)
            generate_termination_pdf(contract_data)
            # List files to check if the output PDF is generated
            print(os.listdir("/content/drive/My Drive/"))
else:
    print("Failed to extract text from PDF.")

Extracted text from PDF:
Fintiba GmbH | Wilhelm-Leuschner-Str. 29 | DE-60329 Frankfurt a. M.
Fintiba GmbH
WWiillhheellmm--LLeeuusscchhnneerr--SSttrraaßßee 2299
Frau 60329 Frankfurt am Main
Zhu GGeerrmmaannyy
Mohan
+49 69 204 34 26 21
behoerden@fintiba.com
23.06.2023
Health Insurance Confirmation for Visa Application (DAK-Gesundheit)
Dear Sir or Madame,
hereby, we confirm that
First Name(s) Mohan
Surname Zhu
Date of Birth 23.06.2001
Place of Birth Anhui,China
Passport Number EJ3620539
Issued on 17.10.2022
By CN
successfully applied for a governmental health insurance at DAK-Gesundheit. The governmental health insurance only
takes effect after official enrolment at the university of choice.
The necessary health insurance documents for university enrolment will be provided as soon as DAK-Gesundheit
has confirmed the application and the health insurance number is available.
The governmental health insurance at DAK-Gesundheit will be active after the enrolment certificate has been
provided 

In [None]:

print(os.listdir("/content/drive/My Drive/"))

['Colab Notebooks', 'logo.jpg', 'Vertrag - 31.10.2023.pdf', 'Insurance.pdf', 'Insurance1.pdf']
