In [None]:
# @title # Installing Libraries
!pip install sentencepiece accelerate

In [None]:
# @title # Importing Libraries

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from accelerate import Accelerator, load_checkpoint_and_dispatch


In [None]:


# @title ## Your Model and Dataset

# @markdown ### Model
# @markdown Select your model

model_name = "tiiuae/falcon-7b-instruct" # @param {type:"string"}
low_cpu_mem_usage = True # @param {type:"boolean"}

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=low_cpu_mem_usage,
    torch_dtype=torch.float16
)
# @markdown <br>
# @markdown  <h2> Save your model locally </h2>
save_directory="/content/model"  # @param {type:"string"}
max_shard = 100 # @param {type:"integer"}
size = "MB" # @param ["MB", "GB"]
max_shard_size = str(max_shard) + str(size)
model.save_pretrained(save_directory, max_shard_size=max_shard_size)

accelerator = Accelerator()
accelerator.save_model(model=model, save_directory=save_directory, max_shard_size=max_shard_size)

In [None]:
# @title ## Load Sharded Model
device_map={"":'cpu'}

model = load_checkpoint_and_dispatch(
    model,
    checkpoint=save_directory,
    device_map=device_map,
    no_split_module_classes=["Block"]
)

In [None]:
# @title ## Pushing to Hub
MODEL_PATH = "Sharathhebbar24/falcon-7b-instruct_sharded" # @param {type:"string"}
HF_TOKEN = "" # @param {type:"string"}

tokenizer.push_to_hub(
    MODEL_PATH,
    token=HF_TOKEN
)

model.push_to_hub(
    MODEL_PATH,
    token=HF_TOKEN
)