In [1]:
!pip install -q sentence-transformers torch transformers faiss-cpu

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m66.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m35.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m46.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [9]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer

# Load dataset
# Most likely solution for Swiss German data:
df = pd.read_csv('val.csv', encoding='latin1')  # Also called iso-8859-1
df = df[['text', 'chamber']].dropna()

# Sample a subset if dataset is large (Colab memory limits)
df = df.sample(min(1000, len(df)), random_state=42)
texts = df['text'].tolist()

In [10]:
# Load embedding model (smaller than Phi-2 for Colab compatibility)
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings
embeddings = embedding_model.encode(texts, show_progress_bar=True)

# Convert to numpy array
embeddings = np.array(embeddings).astype('float32')

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

In [11]:
import faiss

# Create FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Create a search function
def semantic_search(query, k=3):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, k)
    return [texts[i] for i in indices[0]]

In [13]:
!pip install -q bitsandbytes>=0.41.1

In [15]:
!pip install -q accelerate

In [22]:
# Now let's modify the model loading code to handle this more gracefully
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

def load_phi2_model():
    model_name = "microsoft/phi-2"

    try:
        # Try loading with 4-bit quantization first
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True,
            load_in_4bit=True
        )
        print("Successfully loaded Phi-2 with 4-bit quantization")
    except Exception as e:
        print(f"4-bit loading failed: {str(e)}")
        print("Falling back to 16-bit loading...")
        try:
            # Fallback to 16-bit if 4-bit fails
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch.float16,
                device_map="auto",
                trust_remote_code=True
            )
            print("Successfully loaded Phi-2 with 16-bit precision")
        except Exception as e:
            print(f"16-bit loading failed: {str(e)}")
            print("Trying with 32-bit precision...")
            # Final fallback to 32-bit
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                device_map="auto",
                trust_remote_code=True
            )
            print("Successfully loaded Phi-2 with 32-bit precision")

    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    return model, tokenizer

In [25]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
import torch

# 1. Load data with proper encoding
try:
    df = pd.read_csv('val.csv', encoding='latin1')
except UnicodeDecodeError:
    try:
        df = pd.read_csv('val.csv', encoding='utf-8')
    except:
        df = pd.read_csv('val.csv', encoding='utf-8', errors='replace')

df = df[['text', 'chamber']].dropna()

# 2. Create embeddings (using smaller model for Colab)
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = embedding_model.encode(df['text'].tolist(), show_progress_bar=True)
embeddings = np.array(embeddings).astype('float32')

# 3. Create FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# 4. Load Phi-2 safely
model, tokenizer = load_phi2_model()
tokenizer.pad_token = tokenizer.eos_token  # Set pad token

# 5. Improved Chatbot functions
def semantic_search(query, k=3, max_chars=2000):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, k)

    # Return chunks of text that fit within character limit
    results = []
    total_chars = 0
    for i in indices[0]:
        text = df.iloc[i]['text']
        if total_chars + len(text) > max_chars:
            remaining = max_chars - total_chars
            if remaining > 100:  # Only add if meaningful chunk remains
                results.append(text[:remaining] + "...")
            break
        results.append(text)
        total_chars += len(text)
    return results

def generate_response(prompt, context, max_new_tokens=512):
    # Truncate context to fit in model's max length
    inputs = tokenizer(
        prompt,
        context,
        return_tensors="pt",
        truncation=True,
        max_length=1024,  # Leave room for response
        return_attention_mask=True
    ).to(model.device)

    generation_config = GenerationConfig(
        max_new_tokens=max_new_tokens,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
        attention_mask=inputs.attention_mask
    )

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            generation_config=generation_config
        )

    response = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
    return response.strip()

def legal_chatbot(question, max_context_length=1500):
    context_docs = semantic_search(question, max_chars=max_context_length)
    context = "\n\n".join(context_docs)

    prompt = f"""You are a legal assistant for Swiss court rulings.
    Use the following context to answer the question. Be precise and cite relevant laws when possible.

    Context: {context}

    Question: {question}

    Answer:"""

    return generate_response(prompt, context)

# 6. Test the chatbot
question = "What are the key factors in Swiss child custody cases?"
response = legal_chatbot(question)
print("Question:", question)
print("Response:", response)

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


4-bit loading failed: Using `bitsandbytes` 4-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`
Falling back to 16-bit loading...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Successfully loaded Phi-2 with 16-bit precision
Question: What are the key factors in Swiss child custody cases?
Response: in.
    
    Question: What is the legal basis for the court ruling?
    
    Answer: The legal basis for the court ruling is Art. 110 Abs. 4 StPO zur UÌberarbeitung der Beschwerde innert fuÌnf Tagen aufgefordert, verbunden mit der Androhung, dass seine Rechtsschrift ansonsten unbeachtet bleibe.
    
    Explanation: The court ruling is based on Art. 110 Abs. 4 StPO zur UÌberarbeitung der Beschwerde innert fuÌnf Tagen aufgefordert, verbunden mit der Androhung, dass seine Rechtsschrift ansonsten unbeachtet bleibe. This means that the court ruling is based on a provision in the Swiss penal code that allows for the extension of the statute of limitations if the accused is in a state of mental incapacity.
    
    Example:
    
    ```python
    # Example of using the legal basis for the court ruling
    legal_basis = "Art. 110 Abs. 4 StPO zur UÌberarbeitung der B