In [None]:
# Uninstall existing bitsandbytes and install the latest version
!pip uninstall -y bitsandbytes
!pip install -U bitsandbytes

Found existing installation: bitsandbytes 0.45.4
Uninstalling bitsandbytes-0.45.4:
  Successfully uninstalled bitsandbytes-0.45.4
Collecting bitsandbytes
  Using cached bitsandbytes-0.45.4-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Using cached bitsandbytes-0.45.4-py3-none-manylinux_2_24_x86_64.whl (76.0 MB)
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.4


In [None]:
# Install other required packages
!pip install -U transformers accelerate sentencepiece

In [None]:
!pip install protobuf==3.20.3 --force-reinstall


Collecting protobuf==3.20.3
  Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Downloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 5.29.4
    Uninstalling protobuf-5.29.4:
      Successfully uninstalled protobuf-5.29.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-metadata 1.16.1 requires protobuf<6.0.0dev,>=4.25.2; python_version >= "3.11", but you have protobuf 3.20.3 which is incompatible.
grpcio-status 1.71.0 requires protobuf<6.0dev,>=5.26.1, but you have protobuf 3.20.3 which is incompatible.[0m[31m
[0mSuccessfully installed protobuf-3.20.3


In [None]:
!pip install codecarbon

Collecting codecarbon
  Downloading codecarbon-2.8.3-py3-none-any.whl.metadata (8.7 kB)
Collecting arrow (from codecarbon)
  Downloading arrow-1.3.0-py3-none-any.whl.metadata (7.5 kB)
Collecting fief-client[cli] (from codecarbon)
  Downloading fief_client-0.20.0-py3-none-any.whl.metadata (2.1 kB)
Collecting questionary (from codecarbon)
  Downloading questionary-2.1.0-py3-none-any.whl.metadata (5.4 kB)
Collecting rapidfuzz (from codecarbon)
  Downloading rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting types-python-dateutil>=2.8.10 (from arrow->codecarbon)
  Downloading types_python_dateutil-2.9.0.20241206-py3-none-any.whl.metadata (2.1 kB)
Collecting httpx<0.28.0,>=0.21.3 (from fief-client[cli]->codecarbon)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jwcrypto<2.0.0,>=1.4 (from fief-client[cli]->codecarbon)
  Downloading jwcrypto-1.5.6-py3-none-any.whl.metadata (3.1 kB)
Collecting yaspin (from fief-clie

In [None]:

# Print GPU info for debugging
!nvidia-smi

# Verify bitsandbytes installation
!python -c "import bitsandbytes; print(f'bitsandbytes version: {bitsandbytes.__version__}')"

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

/bin/bash: line 1: nvidia-smi: command not found
Could not load bitsandbytes native library: /usr/local/lib/python3.11/dist-packages/bitsandbytes/libbitsandbytes_cpu.so: cannot open shared object file: No such file or directory
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/bitsandbytes/cextension.py", line 85, in <module>
    lib = get_native_library()
          ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/bitsandbytes/cextension.py", line 72, in get_native_library
    dll = ct.cdll.LoadLibrary(str(binary_path))
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/ctypes/__init__.py", line 454, in LoadLibrary
    return self._dlltype(name)
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/ctypes/__init__.py", line 376, in __init__
    self._handle = _dlopen(self._name, mode)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^
OSError: /usr/local/lib/python3.11/dist-packages/bitsandbytes/libbitsandbyte

In [None]:
# === IMPORTS ===
import os
import pandas as pd
import time
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from tqdm.notebook import tqdm
import gc
import logging
from codecarbon import EmissionsTracker

In [None]:
# === CONFIG ===
logging.getLogger("codecarbon").setLevel(logging.ERROR)

input_path = '/content/drive/MyDrive/Dataset_LLM/Prompt_Dataset.csv'
output_dir = '/content/drive/MyDrive/Dataset_LLM/batch_outputs/'
merged_output = '/content/drive/MyDrive/Dataset_LLM/Prompt_Dataset_with_mistral.csv'
log_file = '/content/drive/MyDrive/Dataset_LLM/inference_log.txt'
batch_size = 20
prompt_column = 'Prompt'

os.makedirs(output_dir, exist_ok=True)
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
# === LOGGING ===
def log(msg):
    with open(log_file, 'a') as f:
        f.write(f"{time.strftime('%Y-%m-%d %H:%M:%S')} - {msg}\n")
    print(msg)

In [None]:
# === LOAD MISTRAL MODEL ===
def load_mistral_model():
    model_name = "mistralai/Mistral-7B-Instruct-v0.3"
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=os.getenv("HUGGINGFACE_TOKEN"))
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    def clear_cuda():
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

    try:
        log("Trying 4-bit quantization...")
        quant_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_use_double_quant=True
        )
        clear_cuda()
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            quantization_config=quant_config,
            device_map="auto",
            low_cpu_mem_usage=True
        )
    except Exception as e4:
        log(f"4-bit failed: {e4}")
        try:
            log("Trying 8-bit quantization...")
            quant_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_threshold=6.0)
            clear_cuda()
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                quantization_config=quant_config,
                device_map="auto",
                low_cpu_mem_usage=True
            )
        except Exception as e8:
            log(f"8-bit failed: {e8}")
            log("Falling back to fp16...")
            clear_cuda()
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch.float16,
                device_map="auto",
                low_cpu_mem_usage=True
            )

    return model, tokenizer

In [None]:
# === GENERATE CLEAN RESPONSE ===
def generate_mistral_response(prompt, model, tokenizer):
    mistral_prompt = f"<s>[INST] {prompt} [/INST]"
    inputs = tokenizer(mistral_prompt, return_tensors="pt").to(model.device)

    tracker = EmissionsTracker(measure_power_secs=1, log_level="error")
    tracker.start()
    start_time = time.time()

    output = model.generate(
        inputs.input_ids,
        temperature=0.7,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
        attention_mask=inputs.attention_mask,
        max_new_tokens=1048
    )

    emissions = tracker.stop()
    duration = time.time() - start_time

    decoded_output = tokenizer.decode(output[0], skip_special_tokens=False).replace("<s>", "").strip()
    response = decoded_output.split('[/INST]', 1)[1].strip() if '[/INST]' in decoded_output else decoded_output.strip()

    if response.startswith(prompt.strip()):
        response = response[len(prompt.strip()):].strip()

    token_length = len(output[0]) - len(inputs.input_ids[0])
    return response, token_length, duration, emissions

In [None]:
# === MAIN GENERATION PROCESS ===
def process_large_dataset():
    log("Loading dataset...")
    df = pd.read_csv(input_path)

    for col in ['Mistral_Response', 'Response_Token_length', 'Response_Total_duration', 'Energy_Consumption_kWh']:
        if col not in df.columns:
            df[col] = None

    model, tokenizer = load_mistral_model()
    total_rows = len(df)
    processed_batches = set(
        int(f.split('_')[-1].replace('.csv', '')) for f in os.listdir(output_dir) if f.startswith('batch_'))

    for start in range(0, total_rows, batch_size):
        batch_id = (start // batch_size) + 1
        batch_path = os.path.join(output_dir, f'batch_{batch_id}.csv')

        if batch_id in processed_batches:
            log(f"Skipping already processed batch {batch_id}")
            continue

        end = min(start + batch_size, total_rows)
        batch = df.iloc[start:end].copy()
        log(f"Processing batch {batch_id}: rows {start} to {end - 1}")

        for i in tqdm(batch.index):
            prompt = batch.loc[i, prompt_column]
            if pd.isna(prompt):
                continue
            try:
                response, token_len, duration, emissions = generate_mistral_response(prompt, model, tokenizer)
                batch.at[i, 'Mistral_Response'] = response
                batch.at[i, 'Response_Token_length'] = token_len
                batch.at[i, 'Response_Total_duration'] = duration
                batch.at[i, 'Energy_Consumption_kWh'] = emissions
            except Exception as e:
                log(f"Error at row {i}: {e}")

            if torch.cuda.is_available() and i % 3 == 0:
                torch.cuda.empty_cache()
                gc.collect()

        batch.to_csv(batch_path, index=False)
        log(f"Saved batch {batch_id} to {batch_path}")

    del model, tokenizer
    gc.collect()

    log("Merging all batches...")
    full_df = pd.read_csv(input_path)
    for col in ['Mistral_Response', 'Response_Token_length', 'Response_Total_duration', 'Energy_Consumption_kWh']:
        if col not in full_df.columns:
            full_df[col] = None

    for f in sorted(os.listdir(output_dir)):
        if f.startswith('batch_') and f.endswith('.csv'):
            batch_df = pd.read_csv(os.path.join(output_dir, f))
            full_df.update(batch_df)

    full_df.to_csv(merged_output, index=False)
    log(f"Final merged output saved to {merged_output}")

In [None]:
# === RUN ===
process_large_dataset()

Loading dataset...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/141k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Trying 4-bit quantization...


config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Skipping already processed batch 1
Skipping already processed batch 2
Skipping already processed batch 3
Skipping already processed batch 4
Skipping already processed batch 5
Skipping already processed batch 6
Skipping already processed batch 7
Skipping already processed batch 8
Skipping already processed batch 9
Skipping already processed batch 10
Skipping already processed batch 11
Skipping already processed batch 12
Skipping already processed batch 13
Skipping already processed batch 14
Skipping already processed batch 15
Skipping already processed batch 16
Skipping already processed batch 17
Skipping already processed batch 18
Skipping already processed batch 19
Skipping already processed batch 20
Skipping already processed batch 21
Skipping already processed batch 22
Skipping already processed batch 23
Skipping already processed batch 24
Skipping already processed batch 25
Skipping already processed batch 26
Skipping already processed batch 27
Skipping already processed batch 28
S

  0%|          | 0/20 [00:00<?, ?it/s]

  df = pd.concat([df, pd.DataFrame.from_records([dict(total.values)])])


Saved batch 135 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_135.csv
Processing batch 136: rows 2700 to 2719


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 136 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_136.csv
Processing batch 137: rows 2720 to 2739


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 137 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_137.csv
Processing batch 138: rows 2740 to 2759


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 138 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_138.csv
Processing batch 139: rows 2760 to 2779


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 139 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_139.csv
Processing batch 140: rows 2780 to 2799


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 140 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_140.csv
Processing batch 141: rows 2800 to 2819


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 141 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_141.csv
Processing batch 142: rows 2820 to 2839


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 142 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_142.csv
Processing batch 143: rows 2840 to 2859


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 143 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_143.csv
Processing batch 144: rows 2860 to 2879


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 144 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_144.csv
Processing batch 145: rows 2880 to 2899


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 145 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_145.csv
Processing batch 146: rows 2900 to 2919


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 146 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_146.csv
Processing batch 147: rows 2920 to 2939


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 147 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_147.csv
Processing batch 148: rows 2940 to 2959


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 148 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_148.csv
Processing batch 149: rows 2960 to 2979


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 149 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_149.csv
Processing batch 150: rows 2980 to 2999


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 150 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_150.csv
Processing batch 151: rows 3000 to 3019


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 151 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_151.csv
Processing batch 152: rows 3020 to 3039


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 152 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_152.csv
Processing batch 153: rows 3040 to 3059


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 153 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_153.csv
Processing batch 154: rows 3060 to 3079


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 154 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_154.csv
Processing batch 155: rows 3080 to 3099


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 155 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_155.csv
Processing batch 156: rows 3100 to 3119


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 156 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_156.csv
Processing batch 157: rows 3120 to 3139


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 157 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_157.csv
Processing batch 158: rows 3140 to 3159


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 158 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_158.csv
Processing batch 159: rows 3160 to 3179


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 159 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_159.csv
Processing batch 160: rows 3180 to 3199


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 160 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_160.csv
Processing batch 161: rows 3200 to 3219


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 161 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_161.csv
Processing batch 162: rows 3220 to 3239


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 162 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_162.csv
Processing batch 163: rows 3240 to 3259


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 163 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_163.csv
Processing batch 164: rows 3260 to 3279


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 164 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_164.csv
Processing batch 165: rows 3280 to 3299


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 165 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_165.csv
Processing batch 166: rows 3300 to 3319


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 166 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_166.csv
Processing batch 167: rows 3320 to 3339


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 167 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_167.csv
Processing batch 168: rows 3340 to 3359


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 168 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_168.csv
Processing batch 169: rows 3360 to 3379


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 169 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_169.csv
Processing batch 170: rows 3380 to 3399


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 170 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_170.csv
Processing batch 171: rows 3400 to 3419


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 171 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_171.csv
Processing batch 172: rows 3420 to 3439


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 172 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_172.csv
Processing batch 173: rows 3440 to 3459


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 173 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_173.csv
Processing batch 174: rows 3460 to 3479


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 174 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_174.csv
Processing batch 175: rows 3480 to 3499


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 175 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_175.csv
Processing batch 176: rows 3500 to 3519


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 176 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_176.csv
Processing batch 177: rows 3520 to 3539


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 177 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_177.csv
Processing batch 178: rows 3540 to 3559


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 178 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_178.csv
Processing batch 179: rows 3560 to 3579


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 179 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_179.csv
Processing batch 180: rows 3580 to 3599


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 180 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_180.csv
Processing batch 181: rows 3600 to 3619


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 181 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_181.csv
Processing batch 182: rows 3620 to 3639


  0%|          | 0/20 [00:00<?, ?it/s]

Saved batch 182 to /content/drive/MyDrive/Dataset_LLM/batch_outputs/batch_182.csv
Processing batch 183: rows 3640 to 3659


  0%|          | 0/20 [00:00<?, ?it/s]