In [1]:
# import os
# import yaml
import torch
# from unsloth import FastLanguageModel
# from huggingface_hub import login
# import argparse
# from peft import PeftModel

from dotenv import load_dotenv
load_dotenv()

from transformers import AutoTokenizer, AutoModelForCausalLM

device = torch.device("cuda")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct", device=device)
model = AutoModelForCausalLM.from_pretrained(
    "ShethArihant/Llama-3.1-8B-us-army-fm-instruct",
    dtype=torch.bfloat16,
    device_map="cuda"
)
model.eval()
# model = model.to(device)

Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.48it/s]


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096, padding_idx=128004)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((4

In [3]:
messages = [
  {
    "role": "user",
    "content": "Are Areas of Operations (AO) always contiguous?"
  },
  {
    "role": "assistant",
    "content": "No, Areas of Operations (AO) are not always contiguous. Operational areas may be contiguous or noncontiguous. When they are contiguous, a boundary separates them. When operational areas are noncontiguous, subordinate commands do not share a boundary, and the higher headquarters retains responsibility for the unassigned portion of its operational area."
  },
  {
    "role": "user",
    "content": "Are Joint Security Areas (JSAs) always located within areas actively engaged in combat?"
  },
  {
    "role": "assistant",
    "content": "No, Joint Security Areas (JSAs) are not always located within areas actively engaged in combat. JSAs can be intermixed with combat elements and may include intermediate support bases and other support facilities. They can be used in both linear and nonlinear situations."
  },
  {
    "role": "user",
    "content": "In an operation with noncontiguous operational areas, are the subordinate units always mutually supporting?"
  },
  {
    "role": "assistant",
    "content": "No, in an operation with noncontiguous operational areas, the subordinate units are not always mutually supporting. They can be widely distributed and beyond the mutually supporting range of each other. This is true even for a Service or functional component, such as the ground component, which could have such a large operational area that its subordinate units operate in a noncontiguous manner."
  }
]

In [4]:
inputs = tokenizer.apply_chat_template(
	messages[:3],
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

In [19]:
outputs = model.generate(**inputs, max_new_tokens=100)

No, Areas of Operations (AO) are not always contiguous. Operational areas may be contiguous or noncontiguous. When they are contiguous, a boundary separates them. When operational areas are noncontiguous, subordinate commands do not share a boundary, and the higher headquarters retains responsibility for the unassigned portion of its operational area.

In [20]:
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

No, Joint Security Areas (JSAs) are not always located within areas actively engaged in combat. JSAs are designated areas where the host nation or other security forces have the primary responsibility for security. They can be located in areas that are not actively engaged in combat, such as in rear areas or in areas where stability operations are being conducted. JSAs are used to free up joint forces to focus on combat operations, and they help to build partner capacity and stability in the region.<|eot_id|><|start_header_id|>assistant


In [5]:
from transformers import TextIteratorStreamer
from threading import Thread
text_streamer = TextIteratorStreamer(tokenizer)
import textwrap
max_print_width = 100

inputs = tokenizer.apply_chat_template(
    messages[:3],
    add_generation_prompt=True,
    tokenize=True,
    return_dict=True,
    return_tensors="pt",
).to(model.device)

In [8]:
generation_kwargs = dict(
    inputs,
    streamer = text_streamer,
    max_new_tokens = 100,
    use_cache = True,
)
thread = Thread(target = model.generate, kwargs = generation_kwargs)
thread.start()

length = 0
for j, new_text in enumerate(text_streamer):
    if j == 0:
        wrapped_text = textwrap.wrap(new_text, width = max_print_width)
        length = len(wrapped_text[-1])
        wrapped_text = "\n".join(wrapped_text)
        # print(wrapped_text, end = "")
    else:
        length += len(new_text)
        if length >= max_print_width:
            length = 0
            print()
        print(new_text, end = "")
    pass
pass

No, Joint Security Areas (JSAs) are 
not always located within areas actively engaged in combat. Joint Security Areas are designated areas 
established to ensure the security of friendly forces and to facilitate freedom of action. They may be located 
in areas that are not actively engaged in combat, but are critical to the success of the operation. 
JSAs can be used to secure key infrastructure, protect critical assets, or provide a safe haven for 
forces to reposition or reconstitute.

JSAs are typically used