In [1]:
from collections import defaultdict
import copy
import json
import os
import gc
import warnings
import threading
import time
import datetime
from os.path import exists, join, isdir
from dataclasses import dataclass, field
import sys
from typing import Optional, Dict, Sequence, Tuple, Union
import numpy as np
from tqdm import tqdm
import logging
import pandas as pd
import importlib
from packaging import version
from packaging.version import parse
import argparse

import torch
import transformers
from torch.nn.utils.rnn import pad_sequence
import torch.nn as nn
import torch.nn.functional as F

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    set_seed,
    Seq2SeqTrainer,
    BitsAndBytesConfig,
    LlamaTokenizer
)
from transformers.activations import ACT2FN

import bitsandbytes as bnb
from datasets import load_dataset, load_from_disk, Dataset
import evaluate

from peft import (
    prepare_model_for_kbit_training,
    LoraConfig,
    get_peft_model,
    PeftModel
)
from peft.tuners.lora import LoraLayer
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR

import deepspeed

os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2,3,4,5,6,7"

'''
param
'''

# llama2chat = "/hpc2hdd/home/lzhang330/ssd_workspace/models/llama-2-7b-chat-hf"
# llama2 = "/hpc2hdd/home/lzhang330/ssd_workspace/models/Llama-2-7b-hf"
llama = "/mnt/sdb/zhanglongteng/data2/share/llama-1/llama-7b-hf"

tokenizer = AutoTokenizer.from_pretrained(
    llama,
    padding_side="right",
    use_fast=False, # Fast tokenizer giving issues.
    tokenizer_type='llama', # Needed for HF name change
)

abcd_idx = [
    tokenizer("A").input_ids[1],
    tokenizer("B").input_ids[1],
    tokenizer("C").input_ids[1],
    tokenizer("D").input_ids[1],
]

print(abcd_idx)


  from .autonotebook import tqdm as notebook_tqdm


[2024-04-10 12:23:26,279] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)


You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


[319, 350, 315, 360]


In [2]:
def safe_dict2file(dictionary:Dict, filename):
    lock = threading.Lock()
    lock.acquire()
    with open(filename, 'a') as json_file:
        try:
            json.dump(dictionary, json_file, indent=4)
            json_file.write("\n")
        finally:
            lock.release()

In [3]:
tokenizer("A")

{'input_ids': [0, 319], 'attention_mask': [1, 1]}

In [4]:
import lm_eval
llama2 = "/mnt/sdb/zhanglongteng/data2/share/zhanglongteng_A6000/Llama-2-7b-hf"

device:str ='cuda'  # 'cuda' or 'cpu'
task_manager = lm_eval.tasks.TaskManager()

# model = AutoModelForCausalLM.from_pretrained(
#     args.model_name_or_path,
#     cache_dir=args.cache_dir,
#     load_in_4bit=args.bits == 4,
#     load_in_8bit=args.bits == 8,
#     device_map=device_map,
#     torch_dtype=(torch.float32 if args.fp16 else (torch.bfloat16 if args.bf16 else torch.float32)),
#     trust_remote_code=args.trust_remote_code,
#     use_auth_token=args.use_auth_token
# )

# results = lm_eval.simple_evaluate( # call simple_evaluate
#     model=lm_obj,
#     tasks=["taskname1", "taskname2"],
#     num_fewshot=0,
#     task_manager=task_manager,
#     ...
# )
# lm_eval.tasks.initialize_tasks()

In [8]:
# correct

results = lm_eval.simple_evaluate(
    model="hf",
    model_args=f"pretrained={llama2},tokenizer={llama2}",
    tasks=['mmlu'],
    num_fewshot=5,
    task_manager=task_manager,
    device=device)

2024-04-08:16:12:54,635 INFO     [evaluator.py:131] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234
2024-04-08:16:12:54,636 INFO     [evaluator.py:177] Initializing hf model, with arguments: {'pretrained': '/mnt/sdb/zhanglongteng/data2/share/zhanglongteng_A6000/Llama-2-7b-hf', 'tokenizer': '/mnt/sdb/zhanglongteng/data2/share/zhanglongteng_A6000/Llama-2-7b-hf'}
2024-04-08:16:12:54,638 INFO     [huggingface.py:163] Using device 'cuda'
Loading checkpoint shards: 100%|██████████| 2/2 [00:45<00:00, 22.95s/it]
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.
2024-04-08:16:18:25,304 INFO     [task.py:395] Building contexts for mmlu_world_religions on rank 0...
100%|██████████| 171/171 [00:02<00:00, 63.42it/s]
2024-04-08:16:18:28,016 INFO     [task.py:395] Building contexts for mmlu_moral_disputes on r

In [14]:
safe_dict2file(results['results'],"mmlu_result.txt")

In [5]:
gsm8k_results = lm_eval.simple_evaluate(
    model="hf",
    model_args=f"pretrained={llama2},tokenizer={llama2}",
    tasks=['gsm8k'],
    num_fewshot=5,
    task_manager=task_manager,
    device=device)

2024-04-10:12:23:54,377 INFO     [evaluator.py:131] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234
2024-04-10:12:23:54,379 INFO     [evaluator.py:177] Initializing hf model, with arguments: {'pretrained': '/mnt/sdb/zhanglongteng/data2/share/zhanglongteng_A6000/Llama-2-7b-hf', 'tokenizer': '/mnt/sdb/zhanglongteng/data2/share/zhanglongteng_A6000/Llama-2-7b-hf'}
2024-04-10:12:23:54,386 INFO     [huggingface.py:163] Using device 'cuda'
Loading checkpoint shards: 100%|██████████| 2/2 [00:08<00:00,  4.30s/it]
2024-04-10:12:24:12,098 INFO     [task.py:395] Building contexts for gsm8k on rank 0...
100%|██████████| 1319/1319 [00:08<00:00, 156.46it/s]
2024-04-10:12:24:20,561 INFO     [evaluator.py:379] Running generate_until requests
Running generate_until requests:   1%|▏         | 17/1319 [02:05<2:11:03,  6.04s/it]