In [None]:
!pip install transformers datasets trl peft bitsandbytes accelerate torch sentencepiece huggingface_hub -U
!pip install --upgrade trl transformers accelerate
!pip install --upgrade deepspeed==0.14.4

# ATTENTION: Be sure to restart the notebook after installing all packages!

In [None]:
from huggingface_hub import interpreter_login
interpreter_login()

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
import warnings
from IPython.display import display, Markdown

# Suppress warnings if needed
warnings.filterwarnings("ignore")

# --- Configuration ---
base_model_id = "meta-llama/Llama-3.2-3B-Instruct"
# *** IMPORTANT: Update this path to where your LoRA adapter weights are saved ***
adapter_path = "./results_llama3_finetuned"
# Set device (use "cuda" if GPU is available, otherwise "cpu")
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Optional: Quantization config for loading base model (if needed for memory)
# Adjust based on your inference hardware and how you trained/saved.
# If you merge first, you might load the base without quantization here.
use_quantization = False # Set to True if you want to load base model in 4-bit
compute_dtype = torch.bfloat16 # Or torch.float16

bnb_config = None
if use_quantization:
    print("Loading base model with 4-bit quantization...")
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=False, # Typically False for inference
    )
else:
    print(f"Loading base model with dtype: {compute_dtype}")


# --- Load Base Model and Tokenizer ---
print(f"Loading base model: {base_model_id}")
model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    torch_dtype=compute_dtype if not use_quantization else None, # Set dtype if not quantizing
    device_map="auto", # Automatically handle device placement
    trust_remote_code=True,
)

print(f"Loading tokenizer for: {base_model_id}")
tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token # Set pad token if needed
tokenizer.padding_side = "right"

# --- Load LoRA Adapter and Merge ---
print(f"Loading LoRA adapter from: {adapter_path}")
# Load the LoRA adapter weights on top of the base model
# Note: device_map="auto" should handle placing the adapter correctly
model = PeftModel.from_pretrained(model, adapter_path)
print("LoRA adapter loaded.")

print("Merging LoRA adapter into the base model...")
# Merge the adapter weights into the base model weights
# This returns a standard transformers model object
model = model.merge_and_unload()
print("Adapter merged.")

# Set the model to evaluation mode
model.eval()

# --- Prepare Prompt using the Correct Chat Template ---
# *** CRITICAL: Use the exact same format as your training data ***
# Example Llama 3.2 Instruct format (VERIFY THIS STRUCTURE!)
prompt_text = "Investigating the effects of manipulation on the gale shapley algorithm."

# Format the prompt using the chat template structure
# Ensure this matches the format_dataset_batch function from your training script
formatted_prompt = f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>

{prompt_text}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

""" # Note the trailing space and newline for the assistant's turn

print("\nFormatted Prompt:")
print(formatted_prompt)

# --- Tokenize Input ---
print("Tokenizing prompt...")
# Encode the formatted prompt into token IDs
# Ensure tensors are moved to the same device as the model
inputs = tokenizer(formatted_prompt, return_tensors="pt", padding=False, truncation=True).to(device)

# --- Generate Output ---
print("Generating response...")
# Generate text using the model
# Adjust generation parameters as needed (max_new_tokens, temperature, top_p, etc.)
with torch.no_grad(): # Disable gradient calculations for inference
    outputs = model.generate(
        **inputs,
        max_new_tokens=2048,          # Maximum number of new tokens to generate
        do_sample=True,             # Whether to use sampling; set to False for deterministic output
        temperature=0.6,            # Controls randomness (lower = more deterministic)
        top_p=0.9,                  # Nucleus sampling probability
        eos_token_id=tokenizer.eos_token_id # Stop generation when EOS token is encountered
    )

# --- Decode and Print Output ---
# Decode the generated token IDs back into text
# Skip special tokens (like padding or EOS) in the output string
# We slice the output to only decode the newly generated tokens (after the input prompt)
generated_token_ids = outputs[0][inputs['input_ids'].shape[1]:]
response = tokenizer.decode(generated_token_ids, skip_special_tokens=True)

div = "="*80
print(f"\n{div}\nModel Response:")
display(Markdown(response))


# print(response)

print("\nInference complete.")



  warn(
2025-04-22 13:06:12.850857: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-22 13:06:12.850920: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-22 13:06:12.852140: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-22 13:06:12.858894: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


[2025-04-22 13:06:14,490] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status




  @autocast_custom_fwd
  @autocast_custom_bwd


Using device: cuda
Loading base model with dtype: torch.bfloat16
Loading base model: meta-llama/Llama-3.2-3B-Instruct


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading tokenizer for: meta-llama/Llama-3.2-3B-Instruct
Loading LoRA adapter from: ./results_llama3_finetuned
LoRA adapter loaded.
Merging LoRA adapter into the base model...
Adapter merged.

Formatted Prompt:
<|begin_of_text|><|start_header_id|>user<|end_header_id|>

Investigating the effects of manipulation on the gale shapley algorithm.<|eot_id|><|start_header_id|>assistant<|end_header_id|>


Tokenizing prompt...


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Generating response...

Model Response:


**Investigating the Effects of Manipulation on the Gale-Shapley Algorithm**

The Gale-Shapley algorithm is a well-known algorithm used for stable marriage problems, which is a fundamental problem in combinatorial optimization and game theory. Manipulation refers to the phenomenon where an algorithm can be influenced by external factors (e.g., the order of proposals, the input order, or external constraints) to produce different stable matchings than the optimal stable matching. This can be undesirable in real-world applications where fairness and robustness are critical.

### Research Problem:

How does manipulation affect the performance and correctness of the Gale-Shapley algorithm? Specifically, under what conditions does the algorithm exhibit manipulation, and how can these effects be characterized or mitigated?

### Hypothesis:

Manipulation in the Gale-Shapley algorithm occurs when the algorithm's order of proposals or external factors influence the matchings in a way that deviates from the optimal stable matching. The extent and nature of manipulation depend on the input parameters such as the number of men and women, the preference lists, and the order of proposals.

### Proposed Approach:

1. **Literature Review:**
   - Study existing theoretical results on the Gale-Shapley algorithm's stability and manipulation.
   - Review prior empirical studies on manipulation in related algorithms.

2. **Theoretical Analysis:**
   - Analyze the algorithm's behavior under different proposal orders and external constraints.
   - Characterize the conditions under which manipulation occurs.
   - Use formal definitions and proofs to establish bounds on manipulation.

3. **Simulation/Experiment:**
   - Implement the Gale-Shapley algorithm with different proposal orders (e.g., random, reverse order, top-down, bottom-up).
   - Generate synthetic datasets with known optimal stable matchings.
   - Measure the deviation between algorithm outputs and optimal matchings due to manipulation.
   - Analyze the impact of different input parameters (number of individuals, preference lists complexity).

4. **Empirical Study:**
   - Apply the algorithm to real-world datasets (e.g., university student matches, job assignments).
   - Compare matches produced by the Gale-Shapley algorithm with and without manipulation.
   - Collect qualitative feedback from domain experts or users on perceived fairness.

### Data Collection Plan:

- Synthetic data: Generate random preference lists and stable matchings for controlled experiments.
- Real data: Obtain datasets from established repositories or partnerships (e.g., university matchmaker datasets).

### Data Analysis Plan:

- Quantitative analysis: Measure the degree of manipulation by comparing algorithm outputs with the true optimal matchings using metrics such as deviation from optimal cost or preference ordering.
- Statistical analysis to test the significance of observed manipulation effects.
- Qualitative analysis of expert feedback to understand perceived fairness.

### Potential Challenges or Limitations:

- Difficulty in formally defining and measuring "manipulation" in the algorithm.
- Computational complexity of simulating all possible proposal orders and external constraints.
- Generalizing results across different problem sizes and preference structures.
- Ensuring the applicability of findings to diverse real-world settings.

### Ethical Considerations:

- If using real data, ensure privacy and confidentiality of participants.
- Transparently communicate findings to avoid misuse of algorithmic fairness claims.
- Consider implications of manipulation on vulnerable populations and fairness in matchmaking.

---

This approach aims to deepen understanding of manipulation in the Gale-Shapley algorithm, combining theoretical analysis, simulation, and empirical validation to improve the robustness and fairness of the algorithm.


Inference complete.
