In [None]:
!pip install bitsandbytes==0.43.2
!pip install transformers
!pip install huggingface_hub

Collecting bitsandbytes==0.43.2
  Downloading bitsandbytes-0.43.2-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Downloading bitsandbytes-0.43.2-py3-none-manylinux_2_24_x86_64.whl (137.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.5/137.5 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.43.2


In [None]:
# !pip uninstall bitsandbytes -y

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
import torch
from huggingface_hub import login

In [None]:
login(token="")

In [None]:
import zipfile
import os

# Path to the zip file
zip_file_path = "fpo_gemma_7b_IT_14DEC-20241214T153700Z-001.zip"

# Directory to extract the files
extract_to_path = "/content/model/"

# Create the directory if it doesn't exist
os.makedirs(extract_to_path, exist_ok=True)

# Unzip the file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to_path)

print(f"Files extracted to: {extract_to_path}")


Files extracted to: /content/model/


In [None]:

# Define paths
base_model_path = "google/gemma-7b-it"  # Path to the base model (Gemma-7b)
adapter_path = "basab1142/FPO_Gemma_7b_it"  # Path to your adapter files

# Set up quantization configuration
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# Load the base model with quantization
print("🔧 Loading base model with quantization...")
model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    torch_dtype=torch.float16,
    quantization_config=quantization_config,
    device_map="auto"
)
print("✓ Base model loaded.")

# Load the tokenizer
print("🔧 Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(adapter_path)
print("✓ Tokenizer loaded.")

# Apply the adapter
print("🔧 Applying adapter...")
model = PeftModel.from_pretrained(model, adapter_path)
print("✓ Adapter applied. Model is ready.")

# Ensure the model is in evaluation mode
model.eval()





🔧 Loading base model with quantization...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

✓ Base model loaded.
🔧 Loading tokenizer...


tokenizer_config.json:   0%|          | 0.00/40.6k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/34.4M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/522 [00:00<?, ?B/s]

✓ Tokenizer loaded.
🔧 Applying adapter...


adapter_config.json:   0%|          | 0.00/714 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/6.44M [00:00<?, ?B/s]

✓ Adapter applied. Model is ready.


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GemmaForCausalLM(
      (model): GemmaModel(
        (embed_tokens): Embedding(256000, 3072, padding_idx=0)
        (layers): ModuleList(
          (0-27): 28 x GemmaDecoderLayer(
            (self_attn): GemmaSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=3072, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=4, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=4, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
          

In [None]:
# Example usage
def run(topic,context):
    prompt="<|im_start|>system\n" + f"The topic is a title of a news article and context is also provided, Generate an article based on that.\n\n topic:{topic} \n Context:{context}\n" +"<|im_end|>\n<|im_start|>user\n"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs,max_new_tokens=750)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    clean_output = generated_text[len(prompt):].strip()
    return clean_output


In [None]:
import pandas as pd
import requests
import json
from tqdm import tqdm
tqdm.pandas()

In [None]:
def batch_run(df, batch_size):
    results = []
    for start in tqdm(range(0, len(df), batch_size)):
        end = min(start + batch_size, len(df))
        batch = df.iloc[start:end]

        # Run your model generation for the batch
        batch['Gemma-7b-it-fpo'] = batch.apply(
            lambda row: run(row['abstract'], row['Web_ret_context']), axis=1
        )

        results.append(batch)

    # Concatenate all processed batches back into a single DataFrame
    final_result = pd.concat(results).reset_index(drop=True)
    return final_result

In [None]:

df = pd.read_csv("hf://datasets/gsingh1-py/gemma-7b-it/Gemma-7b-fpo (1) (1).csv")
df=df.head(5)
# Call the batch processing function on your DataFrame
df_new = batch_run(df, batch_size=10)
df_new.info()
df_new.to_csv("Gemma-fpo.csv",index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  batch['Gemma-7b-it-fpo'] = batch.apply(
100%|██████████| 1/1 [00:51<00:00, 51.70s/it]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 6 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   title                5 non-null      object
 1   abstract             5 non-null      object
 2   human_story_fetched  5 non-null      object
 3   Web_ret_context      5 non-null      object
 4   Gemma-7b             5 non-null      object
 5   Gemma-7b-it-fpo      5 non-null      object
dtypes: object(6)
memory usage: 368.0+ bytes





In [None]:
df_new

Unnamed: 0,title,abstract,human_story_fetched,Web_ret_context,Gemma-7b,Gemma-7b-it-fpo
0,"Inflation slowed, good news as the Fed meets.",Federal Reserve officials are poised to announ...,U.S. Economy\nU.S. Economy\nSupported by\nInfl...,"The Fed targets 2% annual inflation, a level i...","Federal Reserve Holds Rates Steady, Eyes Econ...",The Fed's decision to hold rates steady is a c...
1,Buyers Snap Up Aging and Empty Office Building...,Bargain hunters are getting deals of up to 70 ...,Supported by\nBuyers Snap Up Aging and Empty O...,By Rob Copeland If landlords can’t pay back lo...,Bargain Deals Point to Distress in Commercial...,The growing distress in commercial real estate...
2,What to Watch as the Fed Meets,Federal Reserve officials are expected to leav...,U.S. Economy\nU.S. Economy\nSupported by\nThe ...,"Fed leaves interest rates unchanged, suggests ...","Federal Reserve Holds Rates Steady, Hints at ...",The Fed's decision to hold rates steady is a s...
3,Losing a Bridge Upends Life in Baltimore (and ...,Congestion has increased on other routes in th...,BaltimoreBridge Collapse\nLosing a Bridge Upen...,“We were spoiled with that bridge.\n“Part of w...,Congestion Returns to Baltimore as Port Reope...,The streets are dotted with pollution hot spot...
4,Paramount’s Merger Talks With Skydance Fall Apart,There were several hitches in the last week as...,Supported by\nParamount’s Merger Talks With Sk...,"But those talks hit several snags, and Paramou...",Skydance and Paramount Deal Collapse: Unresol...,"The deal, which had been in the works for mont..."
