#### Installation and imports

In [None]:
!pip install transformers peft accelerate bitsandbytes datasets trl huggingface_hub dotenv

In [2]:
from random import randrange
import torch

from datasets import load_dataset

from peft import LoraConfig, prepare_model_for_kbit_training, TaskType, PeftModel

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    set_seed,
    pipeline
)

from trl import SFTTrainer, SFTConfig

In [3]:
model_id = "microsoft/Phi-4-mini-instruct"
model_name = "microsoft/Phi-4-mini-instruct"

# hf_model_repo="SkyR/sample_LI"

# The following are parameters for the LoRA (Learning from Random Architecture) model.
lora_r = 16
lora_alpha = 16
lora_dropout = 0.05

target_modules= ['k_proj', 'q_proj', 'v_proj', 'o_proj', "gate_proj", "down_proj", "up_proj"]

device_map = {"": 0}

set_seed(1234)

In [7]:
from huggingface_hub import login
from dotenv import load_dotenv
import os

load_dotenv()

login(token=os.getenv("HF_WRITE_TOKEN"))

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

#### Load dataset

In [5]:
import json
from datasets import Dataset

data = []
with open('/content/final_dataset.jsonl', 'r') as f:
    for line in f:
        data.append(json.loads(line))

hf_dataset = Dataset.from_list(data)

In [6]:
print(f"dataset size: {len(hf_dataset)}")
print(hf_dataset[77])

dataset size: 2937
{'instruction': "Write a LinkedIn post about a recent technology innovation that's worth reading over the weekend, just like the one I shared earlier. Make sure it's related to Apple", 'output': 'This s is a great weekend read !  https://lnkd.in/ddtyVUs  #apple #technology'}


#### Loading tokenizer and data preparation

In [7]:
tokenizer_id = model_id
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)

tokenizer.padding_side = 'right'

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/249 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/587 [00:00<?, ?B/s]

In [8]:
def create_message_column(row):
    # Empty list to store the messages.
    messages = []

    user = {
        "content": f"You are a professional LinkedIn user.\n Input: {row['instruction']}",
        "role": "user"
    }

    messages.append(user)

    assistant = {
        "content": f"{row['output']}",
        "role": "assistant"
    }

    messages.append(assistant)

    return {"messages": messages}

# 'format_dataset_chatml' is a function that takes a row from the dataset and returns a dictionary
# with a 'text' key and a string of formatted chat messages as its value.
def format_dataset_chatml(row):
    # 'tokenizer.apply_chat_template' is a method that formats a list of chat messages into a single string.

    return {"text": tokenizer.apply_chat_template(row["messages"], add_generation_prompt=False, tokenize=False)}

In [9]:

dataset_chatml = hf_dataset.map(create_message_column)
dataset_chatml = dataset_chatml.map(format_dataset_chatml)

Map:   0%|          | 0/2937 [00:00<?, ? examples/s]

Map:   0%|          | 0/2937 [00:00<?, ? examples/s]

In [10]:
dataset_chatml[0]

{'instruction': 'Can you write a LinkedIn post about the importance of work-based learning for career development in the USA, specifically highlighting the benefits of apprenticeship programs? Please',
 'output': 'Robert Lerman  writes that achieving a healthy future of work requires employees to build skills that help them attain productive and rewarding careers. He notes - "one of the most cost-effective ways to do this is through apprenticeship, which helps workers master occupations and gain professional identity and pride". Coudlnt agree more!  #workbasedlearning   #USA   #apprenticeship   Read the article on  #UrbanWire   Urban Institute       …see more',
 'messages': [{'content': 'You are a professional LinkedIn user.\n Input: Can you write a LinkedIn post about the importance of work-based learning for career development in the USA, specifically highlighting the benefits of apprenticeship programs? Please',
   'role': 'user'},
  {'content': 'Robert Lerman  writes that achieving

In [11]:
dataset_chatml = dataset_chatml.train_test_split(test_size=0.1, seed=1234)

dataset_chatml

DatasetDict({
    train: Dataset({
        features: ['instruction', 'output', 'messages', 'text'],
        num_rows: 2643
    })
    test: Dataset({
        features: ['instruction', 'output', 'messages', 'text'],
        num_rows: 294
    })
})

#### Instruction fine-tuning

In [12]:
if torch.cuda.is_bf16_supported():
  compute_dtype = torch.bfloat16
  attn_implementation = 'flash_attention_2'
# If bfloat16 is not supported, 'compute_dtype' is set to 'torch.float16' and 'attn_implementation' is set to 'sdpa'.
else:
  compute_dtype = torch.float16
  attn_implementation = 'sdpa'

# This line of code is used to print the value of 'attn_implementation', which indicates the chosen attention implementation.
print(attn_implementation)

flash_attention_2


##### Loading model and tokenizer

In [13]:
model_name = "microsoft/Phi-4-mini-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, add_eos_token=True, use_fast=True)

tokenizer.pad_token = tokenizer.unk_token

tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)

# The padding side is set to 'left', meaning that padding tokens will be added to the left (start) of the sequence.
tokenizer.padding_side = 'left'

# Configure for 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_quant_type="nf4", # Or "fp4" depending on your preference
    bnb_8bit_compute_dtype=compute_dtype,
    bnb_8bit_use_double_quant=False,
)


# 'device_map' is passed as an argument to specify the device mapping for distributed training.
model = AutoModelForCausalLM.from_pretrained(
          model_id,
          quantization_config=bnb_config, # Add quantization config
          torch_dtype=compute_dtype,
          trust_remote_code=True,
          device_map=device_map
          )

config.json: 0.00B [00:00, ?B/s]

configuration_phi3.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-4-mini-instruct:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_phi3.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-4-mini-instruct:
- modeling_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.77G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

##### Training

In [14]:
import os
os.environ["WANDB_DISABLED"] = "true"

In [15]:
args = SFTConfig(
        output_dir="/content/phi-4-LinkedIn-8bit",
        eval_strategy="steps",
        do_eval=True,
        optim="adamw_torch",
        per_device_train_batch_size=1,
        gradient_accumulation_steps=8,
        per_device_eval_batch_size=4,
        save_strategy="epoch",
        logging_steps=100,
        learning_rate=1e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        eval_steps=50,
        num_train_epochs=1,
        warmup_ratio=0.1,
        lr_scheduler_type="linear",
        seed=42,
        dataset_text_field="text",
        max_length=412,
        report_to=None
)


peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        task_type=TaskType.CAUSAL_LM,
        target_modules=target_modules,
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [16]:
trainer = SFTTrainer(
        model=model,
        train_dataset=dataset_chatml['train'],
        eval_dataset=dataset_chatml['test'],
        peft_config=peft_config,
        processing_class=tokenizer,
        args=args
)

Tokenizing train dataset:   0%|          | 0/2643 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/2643 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/294 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/294 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [17]:
trainer.train()



Step,Training Loss,Validation Loss
50,No log,2.843237
100,3.307700,2.643895
150,3.307700,2.598282
200,2.596500,2.5761
250,2.596500,2.563874
300,2.541000,2.558807


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


TrainOutput(global_step=331, training_loss=2.785633767119347, metrics={'train_runtime': 3189.6508, 'train_samples_per_second': 0.829, 'train_steps_per_second': 0.104, 'total_flos': 7255429705273344.0, 'train_loss': 2.785633767119347})

##### Saving Adaptor Model and Loading to Hub

In [37]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [39]:
    !cp -r "/content/phi-4-LinkedIn-8bit" "/content/gdrive/MyDrive/"

In [18]:
trainer.save_model()


In [40]:
from huggingface_hub import create_repo
from google.colab import userdata
import os

# Replace with your desired repository name
repo_name = "my-linkedin-8bit-model"
# Replace with your Hugging Face username
your_username = "SkyR" # Replace with your actual username

# Combine username and repo name
repo_id = f"{your_username}/{repo_name}"

# Retrieve your Hugging Face write token from Colab secrets
# Make sure you have stored your token with the name 'HF_WRITE_TOKEN'
hf_token = userdata.get("HF_WRITE_TOKEN")

# Create the repository
try:
    create_repo(repo_id, token=hf_token, exist_ok=True)
    print(f"Repository '{repo_id}' created successfully (or already exists).")
except Exception as e:
    print(f"Error creating repository: {e}")

Repository 'SkyR/my-linkedin-8bit-model' created successfully (or already exists).


In [45]:
from huggingface_hub import HfApi
from google.colab import userdata

# Replace with your repository ID on Hugging Face (e.g., "your_username/your_repo_name")
repo_id = "SkyR/my-linkedin-8bit-model" # Replace with your repository ID

# The local path in your Colab environment where the model files are located
# This could be a folder in your mounted Google Drive or a folder in the Colab environment
folder_path = "/content/phi-4-LinkedIn-8bit" # Replace with the actual path to your model folder

# Retrieve your Hugging Face write token from Colab secrets
hf_token = userdata.get("HF_WRITE_TOKEN")

# Initialize the Hugging Face API client
api = HfApi(token=hf_token)

# Upload the folder to the repository
try:
    api.upload_folder(
        folder_path=folder_path,
        repo_id=repo_id,
        repo_type="model", # Or "dataset", "space" depending on your repo type
    )
    print(f"Successfully uploaded folder '{folder_path}' to repository '{repo_id}'.")
except Exception as e:
    print(f"Error uploading folder: {e}")

adapter_model.safetensors:   0%|          | 0.00/35.7M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/35.7M [00:00<?, ?B/s]

Upload 10 LFS files:   0%|          | 0/10 [00:00<?, ?it/s]

optimizer.pt:   0%|          | 0.00/71.4M [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.75k [00:00<?, ?B/s]

events.out.tfevents.1752319991.6d36cfe81e7b.566.0:   0%|          | 0.00/11.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.75k [00:00<?, ?B/s]

Successfully uploaded folder '/content/phi-4-LinkedIn-8bit' to repository 'SkyR/my-linkedin-8bit-model'.


##### Freeing up memory

In [None]:

# del model
# del trainer

import gc

gc.collect()
gc.collect()


0

In [None]:
torch.cuda.empty_cache()

In [None]:
gc.collect()

60

##### Merging adapter and base model and saving to hub

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
import torch

device_map = {"": 0}

# Define the base model ID and your adapter repository ID
base_model_id = "microsoft/Phi-4-mini-instruct" # Replace with your base model ID
adapter_repo_id = "SkyR/my-linkedin-8bit-model" # Replace with your adapter repository ID on Hugging Face

# Determine the device
# device = "cuda:0" if torch.cuda.is_available() else "cpu"

# Configure for 8-bit quantization
# Make sure the bnb_8bit_compute_dtype matches the dtype used during training
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_quant_type="nf4", # Or "fp4" depending on your preference and training config
    bnb_8bit_compute_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
    bnb_8bit_use_double_quant=False,
)

# Load the base model with 8-bit quantization
model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16, # Use the same dtype as training
    trust_remote_code=True,
    device_map=device_map # Load to the appropriate device
)

# Load the adapter and apply it to the base model
model = PeftModel.from_pretrained(model, adapter_repo_id)

# Optional: Merge the adapter weights into the base model for easier inference

# merged_model = model.merge_and_unload()

# Now you can use the 'model' (or 'merged_model' if you merged) for inference
# You will also need to load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)

print("Model and adapter loaded successfully!")

config.json: 0.00B [00:00, ?B/s]

configuration_phi3.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-4-mini-instruct:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_phi3.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-4-mini-instruct:
- modeling_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.77G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/912 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/35.7M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/249 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/587 [00:00<?, ?B/s]

Model and adapter loaded successfully!


In [4]:
merged_model = model.merge_and_unload()




In [5]:
merged_model.save_pretrained("/content/merged_adapter_8_bit", trust_remote_code=True, safe_serialization=True)

In [6]:
model_id = "microsoft/Phi-4-mini-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, add_eos_token=True, use_fast=True)

tokenizer.pad_token = tokenizer.unk_token

tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)

# The padding side is set to 'left', meaning that padding tokens will be added to the left (start) of the sequence.
tokenizer.padding_side = 'left'

tokenizer.save_pretrained("/content/merged_adapter_8_bit")

('/content/merged_adapter_8_bit/tokenizer_config.json',
 '/content/merged_adapter_8_bit/special_tokens_map.json',
 '/content/merged_adapter_8_bit/chat_template.jinja',
 '/content/merged_adapter_8_bit/vocab.json',
 '/content/merged_adapter_8_bit/merges.txt',
 '/content/merged_adapter_8_bit/added_tokens.json',
 '/content/merged_adapter_8_bit/tokenizer.json')

In [9]:
from huggingface_hub import create_repo
from google.colab import userdata
import os

# Replace with your desired repository name
repo_name = "linkedin-8bit-phi4"
# Replace with your Hugging Face username
your_username = "SkyR" # Replace with your actual username

# Combine username and repo name
repo_id = f"{your_username}/{repo_name}"

# Retrieve your Hugging Face write token from Colab secrets
# Make sure you have stored your token with the name 'HF_WRITE_TOKEN'
hf_token = userdata.get("HF_WRITE_TOKEN")

# Create the repository
try:
    create_repo(repo_id, token=hf_token, exist_ok=True)
    print(f"Repository '{repo_id}' created successfully (or already exists).")
except Exception as e:
    print(f"Error creating repository: {e}")

Repository 'SkyR/linkedin-8bit-phi4' created successfully (or already exists).


In [10]:
from huggingface_hub import HfApi
from google.colab import userdata

# Replace with your repository ID on Hugging Face (e.g., "your_username/your_repo_name")
repo_id = "SkyR/linkedin-8bit-phi4" # Replace with your repository ID

# The local path in your Colab environment where the model files are located
# This could be a folder in your mounted Google Drive or a folder in the Colab environment
folder_path = "/content/merged_adapter_8_bit" # Replace with the actual path to your model folder

# Retrieve your Hugging Face write token from Colab secrets
hf_token = userdata.get("HF_WRITE_TOKEN")

# Initialize the Hugging Face API client
api = HfApi(token=hf_token)

# Upload the folder to the repository
try:
    api.upload_folder(
        folder_path=folder_path,
        repo_id=repo_id,
        repo_type="model", # Or "dataset", "space" depending on your repo type
    )
    print(f"Successfully uploaded folder '{folder_path}' to repository '{repo_id}'.")
except Exception as e:
    print(f"Error uploading folder: {e}")

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/4.45G [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

Successfully uploaded folder '/content/merged_adapter_8_bit' to repository 'SkyR/linkedin-8bit-phi4'.


#### Inferencing

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Replace with the repository ID of your merged model on Hugging Face
merged_model_repo_id = "SkyR/linkedin-8bit-phi4" # Replace with your repository ID

# Load the tokenizer and model from the Hugging Face Hub
tokenizer = AutoTokenizer.from_pretrained(merged_model_repo_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(merged_model_repo_id, trust_remote_code=True)


In [13]:
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)


Device set to use cuda:0


In [14]:
prompt = "Write a professional LinkedIn post in a formal theme on the etiquettes in an online meeting."

messages = [{"role": "user", "content": prompt}]
formatted_prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)

# Generate text
result = generator(formatted_prompt, max_new_tokens=400, num_return_sequences=1, add_special_tokens=False)

In [15]:
response = result[0]['generated_text']

# Define the marker for the start of the assistant's response
assistant_marker = "<|assistant|>"

# Find the index of the marker
marker_index = response.find(assistant_marker)

# Check if the marker was found
if marker_index != -1:
    # Slice the string from the end of the marker and strip leading whitespace
    response_refined = response[marker_index + len(assistant_marker):].strip()
else:
    # If the marker is not found, use the original response
    response_refined = response

print(response_refined)

Title: Upholding Professional Etiquette in Online Meetings

Dear LinkedIn Community,

I am excited to share some insights on the importance of maintaining professional etiquette during online meetings. In a world where virtual meetings are becoming the norm, it is crucial to uphold the same level of professionalism as we would in an in-person setting. Here are some key etiquettes to remember:

1. Dress Appropriately: While it may be tempting to relax your attire during online meetings, it is still essential to dress professionally. This shows respect for your colleagues and maintains a professional atmosphere.

2. Turn Off Notifications: To avoid distractions, turn off all notifications during the meeting. This will help you stay focused and ensure that you are fully engaged in the discussion.

3. Join on Time: Be punctual and join the meeting a few moments before it starts. This shows your respect for others' time and demonstrates your commitment to the meeting agenda.

4. Be Prepared