1. Load model

In [None]:
# RoLlama3 Document Generator for Google Colab
# This script generates 20 documents using RoLlama3-8b model and saves them locally

!pip install -q transformers accelerate bitsandbytes

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import json
from datetime import datetime

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

print("Loading model... This may take a few minutes.")
model_name = "OpenLLM-Ro/RoLlama3-8b-Instruct-2024-06-28"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    device_map="auto",
    trust_remote_code=True
)

print("Model loaded successfully!")

[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m59.4/59.4 MB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[?25hLoading model... This may take a few minutes.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/325 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/699 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/172 [00:00<?, ?B/s]

Model loaded successfully!


In [None]:
base_prompt = """Esti un asistent juridic pentru un notar. Genereaza un contract scurt de {}."""

topics = [
    "prestari servicii",
    "imprumut",
    "negociere",
    "franciza",
    "leasing",
    "concesiune",
    "individual de munca pe perioada determinata",
    "individual de munca pe perioada nedeterminata",
    "individual de munca cu timp partial",
    "confidentialitate",
    "furnizare de bunuri",
    "creare de continut si copywriting",
    "consultanta in afaceri",
    "antrepriza",
    "imprumut bani",
    "prestari servicii IT",
    "vanzare cumparare imobil",
    "vanzare cumparare auto",
]

def generate_document(prompt, max_new_tokens=512):
    """Generate text based on prompt"""
    messages = [
        {"role": "user", "content": prompt}
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)

    outputs = model.generate(
        inputs,
        max_new_tokens=max_new_tokens,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    if "assistant" in response:
        response = response.split("assistant")[-1].strip()

    return response

# Step 8: Generate all 20 documents
print("\nGenerating documents...")
documents = []

for i, topic in enumerate(topics, 1):
    print(f"Generating document {i}/20: {topic}")

    prompt = base_prompt.format(topic)
    generated_text = generate_document(prompt)

    doc = {
        "id": i,
        "topic": topic,
        "prompt": prompt,
        "generated_text": generated_text,
        "timestamp": datetime.now().isoformat()
    }

    documents.append(doc)
    print(f"‚úì Document {i} completed ({len(generated_text)} characters)")

print("\nAll documents generated!")

with open('generated_documents.json', 'w', encoding='utf-8') as f:
    json.dump(documents, f, ensure_ascii=False, indent=2)

import os
os.makedirs('documents', exist_ok=True)

for doc in documents:
    filename = f"documents/document_{doc['id']:02d}_{doc['topic'][:30].replace(' ', '_')}.txt"
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(f"Topic: {doc['topic']}\n")
        f.write(f"Generated at: {doc['timestamp']}\n")
        f.write(f"\n{'='*50}\n\n")
        f.write(doc['generated_text'])

print(f"\n‚úì Saved {len(documents)} documents to 'documents/' folder")
print("‚úì Saved combined JSON to 'generated_documents.json'")

"""
from google.colab import drive
drive.mount('/content/drive')

# Copy files to Drive
!cp generated_documents.json "/content/drive/My Drive/"
!cp -r documents "/content/drive/My Drive/"
print("‚úì Files copied to Google Drive!")
"""
from google.colab import files

files.download('generated_documents.json')

# Create a zip of all text files
!zip -r documents.zip documents/
files.download('documents.zip')

print("\n‚úì Files ready for download!")


Generating documents...
Generating document 1/20: prestari servicii
‚úì Document 1 completed (328 characters)
Generating document 2/20: imprumut
‚úì Document 2 completed (1504 characters)
Generating document 3/20: negociere
‚úì Document 3 completed (698 characters)
Generating document 4/20: franciza
‚úì Document 4 completed (1629 characters)
Generating document 5/20: leasing
‚úì Document 5 completed (1159 characters)
Generating document 6/20: concesiune
‚úì Document 6 completed (1436 characters)
Generating document 7/20: individual de munca pe perioada determinata
‚úì Document 7 completed (1177 characters)
Generating document 8/20: individual de munca pe perioada nedeterminata
‚úì Document 8 completed (1529 characters)
Generating document 9/20: individual de munca cu timp partial
‚úì Document 9 completed (1462 characters)
Generating document 10/20: confidentialitate
‚úì Document 10 completed (984 characters)
Generating document 11/20: furnizare de bunuri
‚úì Document 11 completed (149

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

updating: documents/ (stored 0%)
updating: documents/document_13_franciza_afacere.txt (deflated 49%)
updating: documents/document_03_amanet.txt (deflated 47%)
updating: documents/document_18_know-how.txt (deflated 61%)
updating: documents/document_15_intermediere.txt (deflated 48%)
updating: documents/document_17_joc-pariu.txt (deflated 56%)
updating: documents/document_09_consignatie.txt (deflated 54%)
updating: documents/document_08_distributie_exclusiva.txt (deflated 57%)
updating: documents/document_16_ipoteca_imobil.txt (deflated 58%)
updating: documents/document_01_confidentialitate.txt (deflated 77%)
updating: documents/document_11_factoring.txt (deflated 60%)
updating: documents/document_19_mandat_comercial.txt (deflated 54%)
updating: documents/document_02_agentie_comerciala.txt (deflated 96%)
updating: documents/document_20_prestari_servicii.txt (deflated 43%)
updating: documents/document_12_fiducie.txt (deflated 48%)
updating: documents/document_10_cont_curent_bancar.txt (de

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


‚úì Files ready for download!


**AI Critic:**

In [None]:
import json
from google.colab import userdata
from huggingface_hub import InferenceClient

hf_token = userdata.get('HF_TOKEN')

client = InferenceClient(token=hf_token)

def evaluate_contract(contract_text, contract_json=None):
    """
    Evaluate a contract using Mistral AI based on structure, juridical terms, and purpose.

    Args:
        contract_text: Contractul ca document text
        contract_json: Optional, contractul in format JSON

    Returns:
        Dictionary with scores and observations
    """

    prompt = f"""Esti un evaluator de documente juridice. Analizeaza urmatorul contract si da cate un scor de la 1 la 10 pentru fiecare din criteriile urmatoare:

1. Structura (1-10): Are documentul titlu, parti contractante, continut bine organizat?
2. Limbaj juridic (1-10): Sunt folositi termenii juridici corecti si consistent?
3. Scop (1-10): Are contractul un scop bine definit si clar?

Contractul de evaluat:
---
{contract_text}
---

Raspunde DOAR cu un obiect JSON valid in acest format exact, fara text inainte sau dupa:
{{
  "structura": {{
    "scor": <numar 1-10>,
    "observatie": "<comentariu scurt despre structura>"
  }},
  "limbaj_juridic": {{
    "scor": <numar 1-10>,
    "observatie": "<comentariu scurt despre limbajul juridic>"
  }},
  "scop": {{
    "scor": <numar 1-10>,
    "observatie": "<comentariu scurt despre claritatea scopului>"
  }},
  "observatii_generale": "<comentariu general despre contract>"
}}"""

    try:
        model = "mistralai/Mistral-7B-Instruct-v0.2"

        print(f"Using model: {model}")

        messages = [
            {
                "role": "user",
                "content": prompt
            }
        ]

        response = client.chat_completion(
            messages=messages,
            model=model,
            max_tokens=800,
            temperature=0.3
        )

        # Extract the response text from chat completion
        response_text = response.choices[0].message.content

        print(f"\nDEBUG - Raw response:\n{response_text}\n")

        # Parse the JSON response
        cleaned_response = response_text.strip()

        # Remove markdown code blocks if present
        if cleaned_response.startswith("```json"):
            cleaned_response = cleaned_response[7:]
        elif cleaned_response.startswith("```"):
            cleaned_response = cleaned_response[3:]

        if cleaned_response.endswith("```"):
            cleaned_response = cleaned_response[:-3]

        cleaned_response = cleaned_response.strip()

        # Find JSON object in the response
        start_idx = cleaned_response.find('{')
        end_idx = cleaned_response.rfind('}')

        if start_idx != -1 and end_idx != -1:
            json_str = cleaned_response[start_idx:end_idx+1]
            print(f"DEBUG - Extracted JSON:\n{json_str}\n")
            evaluation = json.loads(json_str)
            return evaluation
        else:
            print(f"ERROR - No JSON object found in response")
            print(f"Cleaned response: {cleaned_response}")
            return None

    except json.JSONDecodeError as e:
        print(f"Failed to parse JSON response: {e}")
        print(f"Raw response: {response_text}")
        print(f"Cleaned response: {cleaned_response}")
        return None
    except Exception as e:
        print(f"Error during evaluation: {e}")
        print(f"Error type: {type(e).__name__}")
        import traceback
        traceback.print_exc()
        return None


def evaluate_multiple_contracts(contracts):
    """
    Evaluate multiple contracts and return results.

    Args:
        contracts: List of dictionaries with 'text' and optional 'json' keys

    Returns:
        List of evaluation results
    """
    results = []

    for i, contract in enumerate(contracts):
        print(f"\n{'='*60}")
        print(f"Evaluating contract {i+1}/{len(contracts)}...")
        print('='*60)

        evaluation = evaluate_contract(
            contract.get('text', ''),
            contract.get('json', None)
        )

        if evaluation:
            results.append({
                'contract_id': i + 1,
                'evaluation': evaluation
            })
        else:
            print(f"‚ö†Ô∏è  Contract {i+1} evaluation failed")

    return results


def display_evaluation(evaluation):
    """Pretty print the evaluation results."""
    if not evaluation:
        print("No evaluation available")
        return

    print("\n" + "="*60)
    print("REZULTATE EVALUARE CONTRACT")
    print("="*60)

    print(f"\nüìã STRUCTURA: {evaluation['structura']['scor']}/10")
    print(f"   ‚Üí {evaluation['structura']['observatie']}")

    print(f"\n‚öñÔ∏è  LIMBAJ JURIDIC: {evaluation['limbaj_juridic']['scor']}/10")
    print(f"   ‚Üí {evaluation['limbaj_juridic']['observatie']}")

    print(f"\nüéØ SCOP: {evaluation['scop']['scor']}/10")
    print(f"   ‚Üí {evaluation['scop']['observatie']}")

    print(f"\nüí° OBSERVATII GENERALE:")
    print(f"   {evaluation['observatii_generale']}")

    avg_score = (
        evaluation['structura']['scor'] +
        evaluation['limbaj_juridic']['scor'] +
        evaluation['scop']['scor']
    ) / 3
    print(f"\nüìä Scor Mediu: {avg_score:.1f}/10")
    print("="*60 + "\n")


if __name__ == "__main__":
    sample_contract = {
        'text': """
CONTRACT DE V√ÇNZARE-CUMPƒÇRARE

Acest contract de v√¢nzare-cumpƒÉrare este √Æncheiat astƒÉzi, data de [__], de [V√¢nzƒÉtorul] »ôi [CumpƒÉrƒÉtorul] (√ÆmpreunƒÉ, ‚ÄûParti").

1. V√¢nzarea: V√¢nzƒÉtorul v√¢nd√¢nd »ôi transfer√¢nd proprietatea tuturor drepturilor, ac»õiunilor, interesele »ôi orice alte drepturi »ôi interese √Æn proprietatea situatƒÉ la [Adresa] (‚ÄûProprietate") cƒÉtre CumpƒÉrƒÉtor, cu excep»õia altui acord scris.

2. Pre»õ: V√¢nzƒÉtorul va primi o platƒÉ de [SumƒÉ] (‚ÄûPre»õul") √Æn [Forma de platƒÉ] ca dovadƒÉ a v√¢nzƒÉrii »ôi transferului proprietƒÉ»õii.

3. Taxe: V√¢nzƒÉtorul este responsabil pentru plata taxelor impuse de orice lege sau reglementare √Æn vigoare.

4. Proprietatea va fi transferatƒÉ dupƒÉ ce pre»õul este plƒÉtit √Æn √Æntregime.

5. Rezilieri »ôi condi»õii: Nu existƒÉ rezilieri sau condi»õii.

6. Acordul Final: Acest contract nu poate fi modificat, cu excep»õia cazului √Æn care este aprobat √Æn scris de ambele pƒÉr»õi.

7. Legi: Acest contract este guvernat de legile statului [Statul].

√éncheiat √Æn [Loca»õie],

V√¢nzƒÉtorul:

CumpƒÉrƒÉtor:
""",
        'json': {
            'type': 'contract de vanzare-cumparare',
            'parties': ['Vanzatorul', 'Cumparatorul'],
            'date': '2024-01-01'
        }
    }

    # Evaluate single contract
    print("Evaluare contract exemplu...")
    result = evaluate_contract(sample_contract['text'], sample_contract['json'])
    display_evaluation(result)

    # Example: Evaluate multiple contracts
    # contracts = [sample_contract, another_contract, ...]
    # results = evaluate_multiple_contracts(contracts)
    # for result in results:
    #     print(f"\nContract {result['contract_id']}:")
    #     display_evaluation(result['evaluation'])

Evaluare contract exemplu...
Using model: mistralai/Mistral-7B-Instruct-v0.2

DEBUG - Raw response:
 {
"structura": {
"scor": 8,
"observatie": "Documentul are titlu, parti contractante »ôi un continut bine organizat, cu paragrafe distincte."
},
"limbaj_juridic": {
"scor": 9,
"observatie": "Termenii juridici sunt utilizati corect si consistent, cu excep»õia termenului 'orice alte drepturi' care este neclar."
},
"scop": {
"scor": 7,
"observatie": "Scopul contractului este clar √Æn ce prive»ôte transferarea proprietƒÉ»õii, dar nu este clar dacƒÉ acesta include sau exclud alte drepturi."
},
"observatii_generale": "Contractul are o structurƒÉ bine organizatƒÉ »ôi limbaj juridic corect, cu excep»õia unele neclaritƒÉ»õi. Scopul contractului trebuie clarificat."
}

DEBUG - Extracted JSON:
{
"structura": {
"scor": 8,
"observatie": "Documentul are titlu, parti contractante »ôi un continut bine organizat, cu paragrafe distincte."
},
"limbaj_juridic": {
"scor": 9,
"observatie": "Termenii juridici 