In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from datasets import Dataset

In [3]:
import sys
sys.path.append('./phase2_dapt_implementation')

from phase2_dapt_implementation.domain_adaptation import run_dapt_training, load_processed_data
from phase1_model_instantiation.model_setup import get_qwen_model

In [3]:
# model_path = "/workspace/praxis-research/base-model/qwen-2.5-3b/cache/models--Qwen--Qwen2.5-3B-Instruct/snapshots/aa8e72537993ba99e69dfaafa59ed015b17504d1"
# model, tokenizer = get_qwen_model(model_path)
# ds, _meta = load_processed_data("./data/qwen_processed_data.pkl")

In [5]:
# If you've already processed, let's see what made it through
dataset, metadata = load_processed_data("data/qwen_processed_data.pkl")

# Look at a few examples to see what text content you actually have
for i in range(min(3, len(dataset))):
    sample = dataset[i]
    print(f"\n📝 Sample {i+1}:")
    print(f"  Title: {sample.get('title', 'No title')}")
    print(f"  Source: {sample.get('source', 'No source')}")
    print(f"  Token count: {len(sample['input_ids'])}")
    
    # Decode some tokens to see actual text content
    from transformers import AutoTokenizer
    tokenizer = AutoTokenizer.from_pretrained(metadata['tokenizer_name'])
    decoded_text = tokenizer.decode(sample['input_ids'][:200])  # First 200 tokens
    print(f"  Text preview: {decoded_text}")
    print("-" * 50)

Loaded dataset: 135,491 examples
Tokenizer: Qwen/Qwen2.5-3B
Max length: 1024
Vocab size: 151,643

📝 Sample 1:
  Title: Revised Surgical CAse REport (SCARE) guideline: An update for the age of Artificial Intelligence
  Source: s_scholar_container
  Token count: 331


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

  Text preview: Revised Surgical CAse REport (SCARE) guideline: An update for the age of Artificial Intelligence Introduction: Artificial intelligence (AI) is rapidly transforming healthcare and scientific publishing. Reporting guidelines need to be updated to take into account this advance. The SCARE Guideline 2025 update adds a new AI-focused domain to promote transparency, reproducibility, and ethical integrity in surgical case reports involving AI. Methods: A Delphi consensus exercise was conducted to update the SCARE guidelines. A panel of 49 surgical and scientific experts were invited to rate proposed new items. In round 1, participants scored each item on a 9-point Likert scale and provided feedback. Items not meeting consensus were revised or discarded. Results: A 94% response rate occurred amongst participants (46/49) in the first round. Ratings were analysed for agreement levels, and consensus was reached on all six proposed AI-related items. A revised SCARE checklist is pre

In [4]:
model_path = "/workspace/praxis-research/base-model/qwen-2.5-3b/cache/models--Qwen--Qwen2.5-3B-Instruct/snapshots/aa8e72537993ba99e69dfaafa59ed015b17504d1"
model_, tokenizer = get_qwen_model(model_path)
ds, *meta = load_processed_data("./data/qwen_processed_data.pkl")

✅ Using cached Qwen model at /workspace/praxis-research/base-model/qwen-2.5-3b/cache/models--Qwen--Qwen2.5-3B-Instruct/snapshots/aa8e72537993ba99e69dfaafa59ed015b17504d1
🔄 Loading model and tokenizer from /workspace/praxis-research/base-model/qwen-2.5-3b/cache/models--Qwen--Qwen2.5-3B-Instruct/snapshots/aa8e72537993ba99e69dfaafa59ed015b17504d1


Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

✅ Successfully loaded Qwen model /workspace/praxis-research/base-model/qwen-2.5-3b/cache/models--Qwen--Qwen2.5-3B-Instruct/snapshots/aa8e72537993ba99e69dfaafa59ed015b17504d1
Loaded dataset: 135,491 examples
Tokenizer: Qwen/Qwen2.5-3B
Max length: 1024
Vocab size: 151,643


In [5]:
# Run DAPT training
model_path = run_dapt_training(model_, tokenizer, ds)

Starting DAPT training...


Step,Training Loss
100,2.223
200,2.2099
300,2.1953
400,2.1924
500,2.1924
600,2.173
700,2.1816
800,2.1648
900,2.1654
1000,2.1591


RuntimeError: [enforce fail at inline_container.cc:659] . unexpected pos 13497433344 vs 13497433240

In [6]:
# Your training should have saved checkpoints in ./ai_dapt_checkpoints/
# You can resume from the latest checkpoint
#model_path = run_dapt_training(model, tokenizer, ds, resume_from_checkpoint="./ai_dapt_checkpoints/checkpoint-1000")

In [6]:
from transformers import AutoModelForCausalLM

# Load the model from your latest checkpoint
trained_model = AutoModelForCausalLM.from_pretrained(
    "./ai_dapt_checkpoints/checkpoint-3000",
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

# Save it as a final model
trained_model.save_pretrained("./ai_dapt_final_89percent")
tokenizer.save_pretrained("./ai_dapt_final_89percent")

print("Model saved to ./ai_dapt_final_89percent")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Model saved to ./ai_dapt_final_89percent


In [7]:
# Just load and ask one question
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("./ai_dapt_checkpoints/checkpoint-3000")
tokenizer = AutoTokenizer.from_pretrained("./ai_dapt_checkpoints/checkpoint-3000")

# Ask something
question = "What is artificial intelligence?"
inputs = tokenizer(f"Question: {question}\nAnswer:", return_tensors="pt")
outputs = model.generate(**inputs, max_length=100)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Question: What is artificial intelligence?
Answer: Artificial Intelligence (AI) is a branch of computer science that aims to create systems that can perform tasks requiring human-like intelligence, such as learning, reasoning, and problem-solving. AI technologies include machine learning, natural language processing, computer vision, and robotics. AI systems are designed to learn from data, make decisions, and adapt to new situations without explicit programming. They are widely used in various fields, such as healthcare, finance, education, and customer service


In [9]:
# Ask something
question = "How does the SCARE Guideline 2025 update address the incorporation of artificial intelligence in surgical case reports?"
inputs = tokenizer(f"Question: {question}\nAnswer:", return_tensors="pt")
outputs = model.generate(**inputs, max_length=100)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

Question: How does the SCARE Guideline 2025 update address the incorporation of artificial intelligence in surgical case reports?
Answer: The SCARE Guideline 2025 update emphasizes the need for transparency and ethical considerations when reporting AI-assisted research, ensuring that readers understand how AI was used and its limitations. The guideline recommends that authors disclose whether AI tools were used in data collection, analysis, or both, and that they report on the AI’s performance. It also encourages authors


In [10]:
question = "Tell me about the paper: Towards conversational diagnostic artificial intelligence"
inputs = tokenizer(f"Question: {question}\nAnswer:", return_tensors="pt")
outputs = model.generate(**inputs, max_length=100)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(response)

Question: Tell me about the paper: Towards conversational diagnostic artificial intelligence
Answer: In this work, we introduce a conversational AI system that enables patients to communicate with a chatbot to diagnose and manage their health conditions. The system is built on top of the Med-LLM model, which is fine-tuned to generate context-aware responses to medical questions. It includes an intuitive interface for patients to input symptoms and receive personalized recommendations, such as medication suggestions and treatment plans. Additionally, the system


In [11]:
#import torch

# Your model
model = AutoModelForCausalLM.from_pretrained("./ai_dapt_checkpoints/checkpoint-3000")

# Get model stats
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
model_size_mb = sum(p.numel() * p.element_size() for p in model.parameters()) / (1024**2)

print(f"Total Parameters: {total_params:,}")
print(f"Trainable Parameters: {trainable_params:,}")
print(f"Model Size: {model_size_mb:.1f} MB")
print(f"Model Type: {model.config.model_type}")
print(f"Hidden Size: {model.config.hidden_size}")
print(f"Vocab Size: {model.config.vocab_size}")
print(f"Number of Layers: {model.config.num_hidden_layers}")

Total Parameters: 3,085,938,688
Trainable Parameters: 3,085,938,688
Model Size: 11771.9 MB
Model Type: qwen2
Hidden Size: 2048
Vocab Size: 151936
Number of Layers: 36
