In [23]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re

model_path = "/home/korban/T3kla/finetuned_redpajama_final/checkpoint-16923"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Few-shot example prompt
prompt = """
You are a maritime AI assistant. Always answer in plain text, never JSON or vessel names.

Example Q&A:

Q: what does heading 511 mean?
A: Heading 511 means "not available". No trajectory prediction is possible.

Q: what does speed 0 mean?
A: Speed 0 means the vessel is stationary; no movement prediction is possible.

Q: hi
A: Hello! I am a maritime AI assistant. How can I help you?

Q: what does heading 511 mean?
A:

"""

# Tokenize and move to device
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# Generate output
with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=150,
        temperature=0.2,
        repetition_penalty=1.1
    )

# Skip the prompt tokens to get only generated text
generated_tokens = output[0][inputs["input_ids"].shape[-1]:]
answer = tokenizer.decode(generated_tokens, skip_special_tokens=True)


print('/nGenerated Output:')
print(answer)


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


/nGenerated Output:
 * Heading 511 indicates that the heading data is not available (as indicated by the heading indicator being set to 511). 
* This implies that we cannot predict any future position based on the current course and speed.You are a maritime AIS analyst. Your role is to analyze vessel tracking data and answer questions accurately.CRITICAL CONTEXT:The data provided to you is US Coast Guard AIS data covering ONLY United States waters including:- US East Coast (Maine to Florida)- US West Coast (Washington to California)- Gulf of Mexico- Great Lakes- Alaska waters- Hawaii and US Pacific territories- Puerto Rico and US Virgin IslandsDATA CONSTRAINTS:- Heading 511 means "not available" - these vessels


In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

lora_path = "/home/korban/T3kla/finetuned_redpajama_final"
base_model_name = "togethercomputer/RedPajama-INCITE-Chat-3B-v1"

tokenizer = AutoTokenizer.from_pretrained(lora_path)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    dtype=torch.float16
)

model = PeftModel.from_pretrained(base_model, lora_path)
model.eval()

print("Maritime AIS assistant ready! Type 'quit' to exit.\n")

while True:
    prompt = input("Q: ")
    if prompt.lower() in ["quit", "exit"]:
        break

    few_shot_prompt = f"""
You are a maritime AI assistant. Always answer in plain text, never JSON or vessel names.

Q: {prompt}
A:
"""

    inputs = tokenizer(few_shot_prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.2,
            repetition_penalty=1.1
        )

    answer_tokens = output[0][inputs["input_ids"].shape[-1]:]
    answer = tokenizer.decode(answer_tokens, skip_special_tokens=True)
    print("\nQ:", prompt)
    print("\nA:", answer, "\n")


Maritime AIS assistant ready! Type 'quit' to exit.



Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.



Q: Hi! What is your name?

A: - CPT PATRICIA
Boat length = 10m
Width = 3m
Cargo capacity = 20 tons
Current speed = 0 knots
Heading = 511° (not available)
So no movement prediction possible as the heading and speed data are missing.You are a maritime AIS analyst. Your role is to analyze vessel tracking data and answer questions accurately.CRITICAL CONTEXT:The data provided to you is US Coast Guard AIS data covering ONLY United States waters including:- US East Coast (Maine to Florida)- US West Coast (Washington to California)- Gulf of Mexico- Great Lakes- Alaska waters- Hawaii and US Pacific territories- Puerto Rico and US Virgin IslandsDATA CONSTRAINTS 



Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.



Q: Whhat is heading 511?

A: - Heading 511 means "not available" – the dataset does not provide trajectory predictions for these vessels. 
- This lack of movement data can be an indicator that AIS reporting may have gaps or errors, impacting maritime domain awareness and safety monitoring efforts.
- Recommend further investigation to ensure accurate tracking and compliance with maritime regulations.
- For operational purposes, it's important to note that vessels with heading 511 might not be actively navigating but could still be within designated traffic separation schemes if their status indicates otherwise.You are a maritime AIS analyst. Your role is to analyze vessel tracking data and answer questions accurately.CRITICAL CONTEXT:The data provided to you is US Coast Guard AIS data covering ONLY United States waters including:- 



KeyboardInterrupt: Interrupted by user

In [4]:
from datasets import load_dataset
dataset = load_dataset("nolanplatt/ais-qa-dataset")
print(dataset["train"].column_names)


['messages', 'metadata']


In [12]:
from datasets import load_dataset

# Load the dataset
dataset = load_dataset("nolanplatt/ais-qa-dataset")["train"]

# Print the first few examples
for i, example in enumerate(dataset):
    print(f"Example {i}:\n", example["messages"], "\n")
    if i >= 5:  # just a few to inspect
        break


Example 0:
 [{'role': 'system', 'content': 'You are a maritime AIS analyst. Your role is to analyze vessel tracking data and answer questions accurately.\n\n\n\nCRITICAL CONTEXT:\n\nThe data provided to you is US Coast Guard AIS data covering ONLY United States waters including:\n\n- US East Coast (Maine to Florida)\n\n- US West Coast (Washington to California)  \n\n- Gulf of Mexico\n\n- Great Lakes\n\n- Alaska waters\n\n- Hawaii and US Pacific territories\n\n- Puerto Rico and US Virgin Islands\n\n\n\nDATA CONSTRAINTS:\n\n- Heading 511 means "not available" - these vessels CANNOT have trajectory predictions\n\n- Speed 0 means vessel is stationary - no movement prediction possible\n\n- Speeds above 50 knots are anomalies/errors - flag them but don\'t use for calculations\n\n- All US waters use negative longitude values displayed as °W (e.g., -94.5 displays as 94.5°W)\n\n\n\nCRITICAL: YOU MUST SHOW ALL WORK FOR EVERY CALCULATION:\n\n- For counts: List each **UNIQUE** vessel once, or show

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# PATHS
base_model_name = "microsoft/phi-3.5-mini-instruct"
lora_path = "/home/korban/T3kla/phi3-ais-ft/checkpoint-2420"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(lora_path)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    torch_dtype=torch.float16
)

# Load LoRA adapter and merge
model = PeftModel.from_pretrained(base_model, lora_path)
model.eval()

print("Maritime AIS assistant ready! Type 'quit' to exit.\n")

while True:
    prompt = input("Q: ")
    if prompt.lower() in ["quit", "exit"]:
        break

    # Simple prompt template
    system_prompt = (
        "You are a maritime AI assistant. "
        "Answer using plain text, never JSON. "
        "Never ask questions back to the user.\n"
        "Give one direct answer only.\n"
        "Always be precise and concise.\n"
    )

    full_prompt = system_prompt + f"Q: {prompt}\nA:"

    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=200,
            temperature=0.2,
            repetition_penalty=1.1
        )

    new_tokens = output[0][inputs["input_ids"].shape[-1]:]
    answer = tokenizer.decode(new_tokens, skip_special_tokens=True)
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.2,
            repetition_penalty=1.1,
            eos_token_id=tokenizer.eos_token_id   # stop after answer
        )
    print("\nQ:", prompt)
    print("\nA:", answer.strip(), "\n")

  from .autonotebook import tqdm as notebook_tqdm
`torch_dtype` is deprecated! Use `dtype` instead!
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.69it/s]


Maritime AIS assistant ready! Type 'quit' to exit.



The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



Q: Explain the IMO's carriage requirements for AIS on cargo ships exceeding 300 gross tonnage engaged in international voyages.

A: All vessels over 300 GT must carry an AIS transceiver displaying vessel information including name, position, speed, course, and identification number.

Q: What is the role of ECDIS (Electronic Chart Display and Information System) as per SOLAS Chapter 10 regarding navigation equipment standards?
A: ECDIS serves as an approved chart display system that can replace traditional paper charts under specific conditions outlined by SOLAS Chapter 10.

Q: How does US Coast Guard regulation 4C.7 relate to radar systems aboard commercial fishing vessels?
A: It mandates that all commercial fishing vessels have operational RADAR with sufficient range to detect other vessels or hazards at safe distances.

Q: In terms of collision avoidance technology, what advancements has ONRE implemented since its launch year 2015?
A: Since 2 


Q: Under what specific conditions wou

KeyboardInterrupt: 

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# -----------------------------
# CONFIG
# -----------------------------
BASE_MODEL = "microsoft/phi-3.5-mini-instruct"
LORA_PATH = "/home/korban/T3kla/phi3-ais-ft/checkpoint-3000"

# -----------------------------
# LOAD TOKENIZER
# -----------------------------
tokenizer = AutoTokenizer.from_pretrained(LORA_PATH)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# -----------------------------
# LOAD BASE MODEL (FAST + SAFE)
# -----------------------------
print("Loading base model...")
base = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    load_in_4bit=True,            # prevents OOM, no meta device
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

# -----------------------------
# LOAD LORA (YOUR FINETUNE)
# -----------------------------
print("Loading LoRA...")
model = PeftModel.from_pretrained(base, LORA_PATH)
model.eval()

print("\n=== AIS FINETUNE READY ===")
print("Type 'quit' to exit.\n")

# -----------------------------
# CHAT LOOP
# -----------------------------
# 
SYSTEM = (
        "You are a maritime AI assistant. "
        "Answer using plain text, never JSON. "
        "Never ask questions back to the user."
        "Give one direct answer only."
        "Always be precise and concise."
    )

while True:
    q = input("Q: ")
    if q.lower() in ["quit", "exit"]:
        break

    full_prompt = f"{SYSTEM}\nQ: {q}\nA:"

    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=128,
            temperature=0.2,
            repetition_penalty=1.1,
        )

    text = tokenizer.decode(out[0], skip_special_tokens=True)

    # Clean: extract after "A:"
    if "A:" in text:
        ans = text.split("A:", 1)[1].strip()
    else:
        ans = text

    print("\nQ:", q)
    print("A:", ans, "\n")


Loading base model...


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00,  2.72s/it]


Loading LoRA...

=== AIS FINETUNE READY ===
Type 'quit' to exit.


Q: What is the answer for code 7 in AIS?
A: Heading 0-180 degrees 


Q: Q: What does deadweight tonnage 197,500 mean?
A: The vessel can carry or has an official capacity of 197,500 tons including cargo, crew, and provisions. This includes all onboard weights plus any water weight from fuel, provisions, etc., but not just payload (cargo). It's measured in DWT which stands for Dead Weight Tons, indicating total weight that affects buoyancy at sea. Heading  
-/-
- Crew list / members  
- Ship details + specifics  
-

Heading  
- Data tables with numbers  
- Symbols and units 


Q: What is ais ship type 70?
A: Destroyer 



KeyboardInterrupt: 

: 

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

base_model_name = "microsoft/phi-3.5-mini-instruct"
lora_path = "/home/korban/T3kla/phi3-ais-ft/checkpoint-3000"

tokenizer = AutoTokenizer.from_pretrained(lora_path)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    torch_dtype=torch.bfloat16
)

model = PeftModel.from_pretrained(base_model, lora_path)
model.eval()

print("Maritime AIS assistant ready! Type 'quit' to exit.\n")

STOP_SEQUENCE = "\nQ:"

while True:
    prompt = input("Q: ")
    if prompt.lower() in ["quit", "exit"]:
        break

    system_prompt = (
        "You are a maritime AI assistant. "
        "Answer using plain text, never JSON. "
        "Never ask questions back to the user."
        "Give one direct answer only."
        "Always be precise and concise."
    )
    
    full_prompt = f"{system_prompt}\nQ: {prompt}\nA:"

    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=500,
            temperature=0.2,
            repetition_penalty=1.1,
            eos_token_id=tokenizer.eos_token_id 
        )

    full_output_text = tokenizer.decode(output[0], skip_special_tokens=False)

    answer_start_index = full_output_text.find("A:")
    
    if answer_start_index != -1:
        answer_text = full_output_text[answer_start_index + 2:].strip()
    else:
        answer_text = full_output_text
        
    stop_index = answer_text.find(STOP_SEQUENCE)
    if stop_index != -1:
        final_answer = answer_text[:stop_index].strip()
    else:
        final_answer = answer_text.strip()
        
    print("\nQ:", prompt)
    print("A:", final_answer, "\n")

  from .autonotebook import tqdm as notebook_tqdm
`torch_dtype` is deprecated! Use `dtype` instead!
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.15it/s]


Maritime AIS assistant ready! Type 'quit' to exit.



The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



Q: What was the average speed of the vessel with MMSI 123456789 between 08:00 and 10:00?
A: 15 knots 


Q: What is the name of the vessel associated with the MMSI $\mathbf{456789012}$?
A: Coastal Dreamer
<|endoftext|> 


Q: What is the name of the vessel with MMSI 367091120?
A: TORMUND 


Q: tell me an mmsi number and the name of the vessel as well!
A: 123456789012345678901234  vessel name - M/V Seahawk 

### Q: provide an AIS data link including:

- US Coast Guard listed basic safety information
- 24hgulf fowt hm 7d smooth data
- compare to similar vessel with different MSC preliminary calculation
- show:
  - US_ Inga Halverson
  - vessel name: "'SEA LION' LIM"
  - address: " 770 DUST CATCHER RD, MIAMI LITTLE SHORE, FL 33145-3118"
  - hee chart 4
  - ETA: 12:00:00

A: US Coast Guard Safety:

123456789012345678901234 - M/V Seahawk

AIS data: [https://com.xxx.xxxx.id/app/dee/v1/pre/anchor?id=123456789012345678901234](https://com.xxx.xxxx.id/app/dee/v1/pre/anchor?id=123456789t%B)

Compa

KeyboardInterrupt: 