In [1]:
import torch
from trl import SFTTrainer
from datasets import load_dataset
from transformers import TrainingArguments, TextStreamer
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel, is_bfloat16_supported

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [2]:
torch.cuda.empty_cache()

In [3]:
import yaml
from abc import abstractmethod
from typing import Literal
#import fitz  
from docx import Document 
from datasets import Dataset
from langchain.text_splitter import RecursiveCharacterTextSplitter
import re

In [4]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/mistral-7b-v0.3-bnb-4bit",      # New Mistral v3 2x faster!
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/llama-3-8b-bnb-4bit",           # Llama-3 15 trillion tokens model 2x faster!
    "unsloth/llama-3-8b-Instruct-bnb-4bit",
    "unsloth/llama-3-70b-bnb-4bit",
    "unsloth/Phi-3-mini-4k-instruct",        # Phi-3 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/gemma-7b-bnb-4bit",             # Gemma 2.2x faster!
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

==((====))==  Unsloth 2024.8: Fast Llama patching. Transformers = 4.43.3.
   \\   /|    GPU: NVIDIA GeForce RTX 4090. Max memory: 23.536 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


In [5]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    #target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
     #                 "gate_proj", "up_proj", "down_proj",],
    target_modules = ["o_proj"], #according to the size of the training data we can decide to select more target_modules
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Not an error, but Unsloth cannot patch MLP layers with our manual autograd engine since either LoRA adapters
are not enabled or a bias term (like in Qwen) is used.
Not an error, but Unsloth cannot patch Attention layers with our manual autograd engine since either LoRA adapters
are not enabled or a bias term (like in Qwen) is used.
Unsloth 2024.8 patched 32 layers with 0 QKV layers, 32 O layers and 0 MLP layers.


In [12]:
class Data():

    def __init__(self, patent: str) -> None:
        self.patent = patent

    @classmethod
    def from_config(cls, config_path: str, *args) -> 'Data':
        with open(config_path, 'r') as f:
            conf = yaml.safe_load(f)
        data_config = conf[key]
        return cls(*args,**data_config)
    
    
    
    @abstractmethod
    def setup(self, stage: str) -> None:
        msg = \
        '''
        Please choose one of the stages to start matching
        '''
        raise NotImplementedError(msg)
        
    
    def word_file(self) -> Dataset:
        # Initialize the RecursiveCharacterTextSplitter
        paragraph_splitter = RecursiveCharacterTextSplitter(
            separators=[".\n", "\n\n"], 
            chunk_size=2000, 
            chunk_overlap=0, 
            length_function=len, 
            is_separator_regex=False
        )
        
        # Extract text from the Document paragraphs
        data = Document(self.patent)
        combined_text = ""
        for v in data.paragraphs:
            cleaned_text = re.sub(r'\[\d+\]', ' ', v.text)  # Remove references like [1], [2], etc.
            combined_text += cleaned_text + "\n"  # Separate paragraphs with a newline character
        
        # Split the combined text into chunks
        split_paragraphs = paragraph_splitter.create_documents([combined_text])
       
        

        
        split_extended = []
        for document in split_paragraphs:
            text = document.page_content.strip(". \n:;{<(',”’…+•\n\n\n-\\//")
            text = text.replace("\n", ".").replace("\t", " ").replace("  ", " ").strip()
            text += "." if not text.endswith('.') else ""
            split_extended.append(text)
        
        # Create a Dataset from the processed text
        parsed_data = Dataset.from_list(list(map(lambda p: {"text": p}, split_extended)))
        
        print(parsed_data)
        return parsed_data

    
    
    def pdf_file(self) -> str:
        document = fitz.open(self.patent)
        paragraph_text = []
        for page_num in range(len(document)):
            page = document.load_page(page_num)
            paragraphs_text.append(page.get_text())
        combined_text = ','.join(paragraph_text)
        return combined_text
        
    
    def process_file(self, file_type: Literal['word', 'pdf']) -> str:
        if file_type == 'word':
            return self.word_file()
        elif file_type == 'pdf':
            return self.pdf_file()
        else:
            raise ValueError("Unsupported file type. Use 'word' or 'pdf'.") 

In [13]:
a = Data("PUB-504-Patent.docx")
data = a.process_file('word')

Dataset({
    features: ['text'],
    num_rows: 144
})


In [14]:
data['text'][0]

'IMAGE AND VIDEO CODING AND DECODING.Field of invention.The present invention relates to encoding and decoding of image and video partitioning data. .Background.The Joint Video Experts Team (JVET), a collaborative team formed by MPEG and ITU-T Study Group 16’s VCEG, released a new video coding standard referred to as Versatile Video Coding (VVC). The goal of VVC is to provide significant improvements in compression performance over the existing HEVC standard (i.e., typically twice as much as before). The main target applications and services include — but not limited to — 360-degree and high-dynamic-range (HDR) videos. Particular effectiveness was shown on ultra-high definition (UHD) video test material. Thus, we may expect compression efficiency gains well-beyond the targeted 50% for the final standard..Since the end of the standardisation of VVC v1, JVET has launched an exploration phase by establishing an exploration software (ECM). It gathers additional tools and improvements of ex

In [15]:
data_ = data.select(range(5))

In [16]:
data_

Dataset({
    features: ['text'],
    num_rows: 5
})

In [17]:
import requests
import json
import re
from tqdm import tqdm

HOST = 'localhost:11434'
URI = f'http://{HOST}/api/generate'

def run(paragraph):
    prompt = f"""
    Given the following paragraph from a patent, generate at least 5 relevant technical questions that a researcher might ask about the information presented. For each question:

    1. Provide a concise answer excerpted directly from the patent paragraph.
    2. Follow this with a more comprehensive explanation that elaborates on the answer, potentially drawing connections to broader technical concepts or implications.

    Present the results in a JSON format where each object contains:
    - "question": The technical question
    - "excerpt_answer": The direct excerpt from the patent
    - "explanation": A detailed explanation expanding on the answer

    Only respond with the JSON array containing these objects, without any additional text.

    Patent Paragraph:
    {paragraph}
    """
    
    data = {
        "model": "llama3.1",
        "prompt": prompt,
        "stream": False,
        "temperature": 0
    }
    
    response = requests.post(URI, json=data)
    
    if response.status_code == 200:
        result = response.json()
        return result['response']
    else:
        return f"Error: {response.status_code}, {response.text}"

def prepare_finetuning_data(data):
    finetuning_data = []
    for patent_paragraph in tqdm(data['text'], desc="Processing patents"):
        questions_text = run(patent_paragraph)
        
        try:
            # Try to parse the entire response as a JSON array
            json_array = json.loads(questions_text)
            
            for item in json_array:
                question = item.get('question')
                excerpt_answer = item.get('excerpt_answer')
                explanation = item.get('explanation')
                
                if question and excerpt_answer and explanation:
                    example = {
                        "instruction": question.strip(),
                        "input": "",
                        "output": f"Patent504: {excerpt_answer.strip().rstrip('.')}. \n\n Llama-Explanation: {explanation.strip()}"

                    }
                    finetuning_data.append(example)
                else:
                    print(f"Skipping JSON object due to missing keys: {item}")
        except json.JSONDecodeError:
            # If parsing as an array fails, fall back to the previous method
            json_objects = re.findall(r'\{.*?\}', questions_text, re.DOTALL)
            for obj in json_objects:
                try:
                    item = json.loads(obj)
                    question = item.get('question')
                    excerpt_answer = item.get('excerpt_answer')
                    explanation = item.get('explanation')
                    
                    if question and excerpt_answer and explanation:
                        example = {
                            "instruction": question.strip(),
                            "input": "",
                            "output": f"Patent504: {excerpt_answer.strip().rstrip('.')}. \n\n Llama-Explanation: {explanation.strip()}"

                        }
                        finetuning_data.append(example)
                    else:
                        print(f"Skipping JSON object due to missing keys: {obj}")
                except json.JSONDecodeError:
                    print(f"Failed to decode JSON object: {obj}")
    
    return finetuning_data

finetuning_data = prepare_finetuning_data(data_)

# Write fine-tuning data to a JSON file
with open('finetest1.json', 'w') as f:
    json.dump(finetuning_data, f, indent=4)

print("Finished generating fine-tuning data.")


Processing patents: 100%|████████████████████████████████████████| 5/5 [04:56<00:00, 59.38s/it]

Finished generating fine-tuning data.





In [127]:
import json

# Load JSON data from file
with open('fine.json', 'r') as file:
    processed_data = json.load(file)

In [128]:
a = Dataset.from_list(processed_data)

In [129]:
print(a[0])
len(a)

{'instruction': 'What is the goal of the Versatile Video Coding (VVC) standard?', 'input': '', 'output': 'Patent504: The goal of VVC is to provide significant improvements in compression performance over the existing HEVC standard. \n\n Llama-Explanation: The VVC standard aims to achieve at least twice the compression efficiency of the current High Efficiency Video Coding (HEVC) standard, making it a promising technology for future video coding applications. This improvement is expected to be particularly beneficial for high-dynamic-range (HDR), 360-degree, and ultra-high definition (UHD) videos.'}


700

In [130]:
from unsloth import to_sharegpt
dataset_ = to_sharegpt(
    a,
    merged_prompt = "{instruction}[[\nYour input is:\n{input}]]",
    output_column_name = "output",
    conversation_extension = 3, # Select more to handle longer conversations
)

Merging columns:   0%|          | 0/700 [00:00<?, ? examples/s]

Converting to ShareGPT:   0%|          | 0/700 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/700 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/700 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/700 [00:00<?, ? examples/s]

Extending conversations:   0%|          | 0/700 [00:00<?, ? examples/s]

In [131]:
print(dataset_[0].get('conversations'))

[{'content': 'What is the goal of the Versatile Video Coding (VVC) standard?', 'from': 'user'}, {'content': 'Patent504: The goal of VVC is to provide significant improvements in compression performance over the existing HEVC standard. \n\n Llama-Explanation: The VVC standard aims to achieve at least twice the compression efficiency of the current High Efficiency Video Coding (HEVC) standard, making it a promising technology for future video coding applications. This improvement is expected to be particularly beneficial for high-dynamic-range (HDR), 360-degree, and ultra-high definition (UHD) videos.', 'from': 'assistant'}, {'content': 'What happens when split_cu_flag is equal to 1?', 'from': 'user'}, {'content': 'Patent504: When split_cu_flag is equal to 1 (or true), whether it has been decoded or not, the other syntax elements are decoded. \n\n Llama-Explanation: If the split_cu_flag indicates a further split or division is necessary (i.e., it equals 1), then additional syntax element

In [132]:
print(dataset_[9])

{'conversations': [{'content': 'What other types of splits are mentioned besides quad tree split?', 'from': 'user'}, {'content': 'Patent504: the plurality of splits may further comprise a ternary tree split, a binary tree split and no split. \n\n Llama-Explanation: Besides the quad tree split, which is central to this patented technique, other possible splits include ternary tree splits, binary tree splits, and even the possibility of no split at all. The selection among these options depends on factors such as the quad tree split depth and the conditions outlined in the patent. This variety of splits allows for greater flexibility and adaptability in encoding and decoding processes, which can be beneficial in diverse scenarios.', 'from': 'assistant'}, {'content': 'How does transmitting a flag for each criterion affect the processing of the video?', 'from': 'user'}, {'content': 'Patent504: For each criterion .In an embodiment, one flag for each criterion is transmitted. \n\n Llama-Expl

In [133]:
from unsloth import standardize_sharegpt
dataset = standardize_sharegpt(dataset_)

Standardizing format:   0%|          | 0/700 [00:00<?, ? examples/s]

In [134]:
print(dataset[0])

{'conversations': [{'content': 'What is the goal of the Versatile Video Coding (VVC) standard?', 'role': 'user'}, {'content': 'Patent504: The goal of VVC is to provide significant improvements in compression performance over the existing HEVC standard. \n\n Llama-Explanation: The VVC standard aims to achieve at least twice the compression efficiency of the current High Efficiency Video Coding (HEVC) standard, making it a promising technology for future video coding applications. This improvement is expected to be particularly beneficial for high-dynamic-range (HDR), 360-degree, and ultra-high definition (UHD) videos.', 'role': 'assistant'}, {'content': 'What happens when split_cu_flag is equal to 1?', 'role': 'user'}, {'content': 'Patent504: When split_cu_flag is equal to 1 (or true), whether it has been decoded or not, the other syntax elements are decoded. \n\n Llama-Explanation: If the split_cu_flag indicates a further split or division is necessary (i.e., it equals 1), then additio

In [135]:
alpaca_prompt = """Below is a technical question related to intellectual property, along with contextual input (if provided). Provide a detailed technical response that first incorporates the answer from the referenced article, followed by your own analysis and insights."
### Instruction:
{}

### Input:
{}

### Response:
{}"""

In [136]:
chat_template = """{SYSTEM}
USER: {INPUT}
ASSISTANT: {OUTPUT}"""

In [137]:
print(dataset[0])

{'conversations': [{'content': 'What is the goal of the Versatile Video Coding (VVC) standard?', 'role': 'user'}, {'content': 'Patent504: The goal of VVC is to provide significant improvements in compression performance over the existing HEVC standard. \n\n Llama-Explanation: The VVC standard aims to achieve at least twice the compression efficiency of the current High Efficiency Video Coding (HEVC) standard, making it a promising technology for future video coding applications. This improvement is expected to be particularly beneficial for high-dynamic-range (HDR), 360-degree, and ultra-high definition (UHD) videos.', 'role': 'assistant'}, {'content': 'What happens when split_cu_flag is equal to 1?', 'role': 'user'}, {'content': 'Patent504: When split_cu_flag is equal to 1 (or true), whether it has been decoded or not, the other syntax elements are decoded. \n\n Llama-Explanation: If the split_cu_flag indicates a further split or division is necessary (i.e., it equals 1), then additio

In [138]:
chat_template = """Below is a technical question related to intellectual property, along with contextual input (if provided). Provide a detailed technical response that first incorporates the answer from the referenced article, followed by your own analysis and insights.

### Instruction:
{INPUT}

### Response:
{OUTPUT}"""

from unsloth import apply_chat_template
dataset = apply_chat_template(
    dataset,
    tokenizer = tokenizer,
    chat_template = chat_template,
    default_system_message = "You are a research assistant",
)

Unsloth: We automatically added an EOS token to stop endless generations.


Map:   0%|          | 0/700 [00:00<?, ? examples/s]

In [142]:
print(dataset['text'][1])

<|begin_of_text|>Below is a technical question related to intellectual property, along with contextual input (if provided). Provide a detailed technical response that first incorporates the answer from the referenced article, followed by your own analysis and insights.

### Instruction:
What are the target applications and services of the VVC standard?

### Response:
Patent504: The main target applications and services include — but not limited to — 360-degree and high-dynamic-range (HDR) videos. 

 Llama-Explanation: The VVC standard is designed to cater to emerging video applications such as HDR, 360-degree videos, and UHD content. These formats require more efficient compression methods to maintain acceptable quality while minimizing storage and transmission costs.<|end_of_text|>

### Instruction:
What are the conditions for allowing a ternary split and no split in addition to the quad tree split?

### Response:
Patent504: if the quad tree split depth of the current block is less th

In [141]:
print(dataset)

Dataset({
    features: ['conversations', 'text'],
    num_rows: 700
})


In [147]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 200,
        num_train_epochs = 1, # For longer training runs!
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Map (num_proc=2):   0%|          | 0/700 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [148]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 700 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 200
 "-____-"     Number of trainable parameters = 4,194,304


Step,Training Loss
1,2.1101
2,2.1015
3,1.9265
4,2.0992
5,1.9553
6,1.8749
7,1.7717
8,1.814
9,1.6937
10,1.6122


In [44]:
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

NameError: name 'start_gpu_memory' is not defined

In [149]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
messages = [                    # Change below!
    {"role": "user", "content": "Continue the fibonacci sequence! Your input is 1, 1, 2, 3, 5, 8,"},
]
input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(input_ids, streamer = text_streamer, max_new_tokens = 128, pad_token_id = tokenizer.eos_token_id)

Patent504: 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418. 

 Llama-Explanation: The provided sequence continues with additional values that can be used in the invention. These values are generated using the Fibonacci sequence, which has many applications in computer science and mathematics.<|end_of_text|>


In [150]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
messages = [                         # Change below!
    {"role": "user",      "content": "Continue the fibonacci sequence! Your input is 1, 1, 2, 3, 5, 8"},
    {"role": "assistant", "content": "The fibonacci sequence continues as 13, 21, 34, 55 and 89."},
    {"role": "user",      "content": "What is France's tallest tower called?"},
]
input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(input_ids, streamer = text_streamer, max_new_tokens = 128, pad_token_id = tokenizer.eos_token_id)

Patent504: the Eiffel Tower in Paris. 

 Llama-Explanation: The Eiffel Tower is a famous landmark in Paris, France. It was built in 1889 for the World's Fair and stands at a height of 324 meters (1,063 feet). The tower is named after its designer, engineer Gustave Eiffel, and has become an iconic symbol of Paris and France.<|end_of_text|>


In [152]:
# Save to 8bit Q8_0
if True: model.save_pretrained_gguf("model1", tokenizer,)
# Remember to go to https://huggingface.co/settings/tokens for a token!
# And change hf to your username!
if False: model.push_to_hub_gguf("hf/model", tokenizer, token = "")

# Save to 16bit GGUF
if False: model.save_pretrained_gguf("model", tokenizer, quantization_method = "f16")
if False: model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "f16", token = "")

# Save to q4_k_m GGUF
if False: model.save_pretrained_gguf("model", tokenizer, quantization_method = "q4_k_m")
if False: model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "q4_k_m", token = "")

# Save to multiple GGUF options - much faster if you want multiple!
if False:
    model.push_to_hub_gguf(
        "hf/model", # Change hf to your username!
        tokenizer,
        quantization_method = ["q4_k_m", "q8_0", "q5_k_m",],
        token = "", # Get a token at https://huggingface.co/settings/tokens
    )

Unsloth: ##### The current model auto adds a BOS token.
Unsloth: ##### Your chat template has a BOS token. We shall remove it temporarily.


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 33.1 out of 62.51 RAM for saving.


100%|█████████████████████████████████████████████| 32/32 [00:00<00:00, 243.13it/s]

Unsloth: Saving tokenizer... Done.
Unsloth: Saving model... This might take 5 minutes for Llama-7b...





Done.
==((====))==  Unsloth: Conversion from QLoRA to GGUF information
   \\   /|    [0] Installing llama.cpp will take 3 minutes.
O^O/ \_/ \    [1] Converting HF to GGUF 16bits will take 3 minutes.
\        /    [2] Converting GGUF 16bits to ['q8_0'] will take 10 minutes each.
 "-____-"     In total, you will have to wait at least 16 minutes.

Unsloth: [0] Installing llama.cpp. This will take 3 minutes...


RuntimeError: Unsloth: The file 'llama.cpp/llama-quantize' or 'llama.cpp/quantize' does not exist.
But we expect this file to exist! Maybe the llama.cpp developers changed the name?

In [53]:
tokenizer._ollama_modelfile
print(tokenizer._ollama_modelfile)

FROM {__FILE_LOCATION__}

TEMPLATE """Below are some instructions that describe some tasks. Write responses that appropriately complete each request.{{ if .Prompt }}

### Instruction:
{{ .Prompt }}{{ end }}

### Response:
{{ .Response }}<|end_of_text|>"""

PARAMETER stop "<|end_header_id|>"
PARAMETER stop "<|eot_id|>"
PARAMETER stop "<|start_header_id|>"
PARAMETER stop "<|end_of_text|>"
PARAMETER stop "<|reserved_special_token_"


In [54]:
!ollama create unsloth_model -f ./model/Modelfile

[?25ltransferring model data ⠙ [?25h[?25l[2K[1Gtransferring model data ⠹ [?25h[?25l[2K[1Gtransferring model data ⠸ [?25h[?25l[2K[1Gtransferring model data ⠼ [?25h[?25l[2K[1Gtransferring model data ⠴ [?25h[?25l[2K[1Gtransferring model data ⠴ [?25h[?25l[2K[1Gtransferring model data ⠧ [?25h[?25l[2K[1Gtransferring model data ⠇ [?25h[?25l[2K[1Gtransferring model data ⠏ [?25h[?25l[2K[1Gtransferring model data ⠋ [?25h[?25l[2K[1Gtransferring model data ⠙ [?25h[?25l[2K[1Gtransferring model data ⠹ [?25h[?25l[2K[1Gtransferring model data ⠸ [?25h[?25l[2K[1Gtransferring model data ⠼ [?25h[?25l[2K[1Gtransferring model data ⠴ [?25h[?25l[2K[1Gtransferring model data ⠦ [?25h[?25l[2K[1Gtransferring model data ⠧ [?25h[?25l[2K[1Gtransferring model data ⠇ [?25h[?25l[2K[1Gtransferring model data ⠏ [?25h[?25l[2K[1Gtransferring model data ⠋ [?25h[?25l[2K[1Gtransferring model data ⠙ [?25h[?25l[2K[1Gtransferring model data ⠙ [

In [50]:
import os
print(os.getcwd())

/opt/mobin/Fine-Tuning/Unsloth
