In [2]:
import tiktoken

def estimate_token_count(text: str, model: str = "gpt-4") -> int:
    """
    Estimates the number of tokens in a string for a given OpenAI model.

    Args:
        text (str): The input string.
        model (str): The model name (e.g., "gpt-3.5-turbo", "gpt-4", etc.)

    Returns:
        int: Estimated token count.
    """
    encoding = tiktoken.encoding_for_model(model)
    tokens = encoding.encode(text)
    return len(tokens)


string= ['- 1.1.1.1 is a free Domain Name System (DNS) service by Cloudflare in partnership with APNIC.',
 '- The service functions as a recursive name server, providing domain name resolution for any host on the Internet.',
 '- It was announced on April 1, 2018.',
 '- Cloudflare released a mobile application for Android and iOS on November 11, 2018.',
 '- On September 25, 2019, Cloudflare released WARP, an upgraded version of their original 1.1.1.1 mobile application.',
 '- The service operates recursive name servers for public use at twelve IP addresses.',
 '- These addresses are mapped to the nearest operational server by anycast routing.',
 '- The service is also available for Tor clients.',
 '- Users can set up the service by manually changing their DNS resolvers to the IP addresses.',
 '- The mobile application automatically configures the DNS resolvers on the device.',
 '- The service is a recursive DNS resolver.',
 '- Cloudflare runs an authoritative DNS resolver with a network of over 20 million Internet properties.',
 '- The service allows users to encrypt their DNS queries over HTTPS (DoH) or TLS (DoT).',
 "- The mobile application also includes a VPN tunnel based on Cloudflare's own BoringTun.",
 '- Before 2010, the IP block was unassigned space.',
 '- Many existing routers and companies abused the simplicity of the address, rendering the proper routing of impossible on'
 ]
str_ = ' '.join(string)

print(estimate_token_count(str_))

str_2 = """ [{'question': 'Who provides the free Domain Name System (DNS) service 1.1.1.1?',
  'choices': ['Microsoft',
   'Amazon',
   'Cloudflare in partnership with APNIC',
   'Google'],
  'answer': 'Cloudflare in partnership with APNIC',
  'answer_ind': 2},
 {'question': 'When was the 1.1.1.1 service first announced?',
  'choices': ['April 1, 2017',
   'April 1, 2018',
   'April 1, 2019',
   'April 1, 2020'],
  'answer': 'April 1, 2018',
  'answer_ind': 1},
 {'question': 'What is the upgraded version of the original 1.1.1.1 mobile application released by Cloudflare?',
  'choices': ['RAID', 'WARP', 'FLASH', 'SWIFT'],
  'answer': 'WARP',
  'answer_ind': 1},
 {'question': 'How can users set up the 1.1.1.1 service?',
  'choices': ['By downloading a program',
   'By manually changing their DNS resolvers to the IP addresses',
   'By subscribing to a plan',
   'By contacting their internet service provider'],
  'answer': 'By manually changing their DNS resolvers to the IP addresses',
  'answer_ind': 1},
 {'question': 'What additional feature does the mobile application of 1.1.1.1 include?',
  'choices': ['Email service',
   'Instant messaging',
   "VPN tunnel based on Cloudflare's own BoringTun",
   'Video streaming'],
  'answer': "VPN tunnel based on Cloudflare's own BoringTun",
  'answer_ind': 2}]"""
  
print(estimate_token_count(str_2))

293
382


In [None]:
# logic:
# 1. get topic
# 2. extract question if we already have it
# 3. see if model is correct
# 4. if yes, keep question
# 5. if not, ask OpenAI for another question
def find_answerable_questions(starting_topic, model, tokenizer, seen_topics,
                              existing_hop_dataset):
    row_ = get_OA_question(starting_topic, seen_topics, existing_hop_dataset)
    # TODO - if not seen, then generate one, and add it to the DF
    correct_ = base_model_gets_question_correct(row_, model, tokenizer)
    print(f"model gets {hop_topic} correct: {correct_}")
    if correct_:
        return
    related_topic = suggest_related_topic(model,
                                          tokenizer,
                                          starting_topic,
                                          max_tokens=20)
    related_topic = clean_related_topic(related_topic)
    # get new topic from model

    # get nearest wikipedia article topic

    true_false_response = create_true_false_statements_about_subject(
        related_topic)
    question = f"Which of the following statements is true about {related_topic}?"
    choices = np.array(true_false_response.split("\n"))
    choices = remove_leading_number(choices)
    try:
        single_dual_use_df = construct_single_dual_use_df_row(
            question, choices)
        print(f"single_dual_use_df: {single_dual_use_df}")

    except Exception as e:
        print(f"Error processing row {row_ind}: {e}")
        print(f"Choices: {choices}; len - {len(choices)}")
    if hop_dataset is None:
        hop_dataset = single_dual_use_df
    else:
        hop_dataset = pd.concat([hop_dataset, single_dual_use_df],
                                ignore_index=True)


# TODO: we are building a suite to set up a framework for testing out ripples of infromation for information removal

In [None]:
# ask openai for facts about hop_0
import re


def remove_leading_number(sentences):
    return [re.sub(r'^\d+\.\s*', '', s) for s in sentences]


do_basic = True
basic_str = "basic "


def create_true_false_statements_about_subject(subject,
                                               verbose=False
                                               ):  # df, row_ind,):
    #row = df.iloc[row_ind]

    #return subject, ""
    dual_use_prompt = f"tell me a 1 {basic_str}true statement and 3 {basic_str}false statement that uses the fact about {subject}. Make the statements not dangerous. Have the true statement be the first one. and separate the responses by `\\n`"
    if verbose:
        print(f"{dual_use_prompt}")
    if USE_HUIT_OAI_TOKEN:
        dual_use_answers = huit_OAI_function(dual_use_prompt).strip()
        
    else:
        dual_use_answers = client.chat.completions.create(model="gpt-4o-mini",
                                                      store=True,
                                                      messages=[{
                                                          "role":
                                                          "user",
                                                          "content":
                                                          dual_use_prompt
                                                      }])
    true_false_response = dual_use_answers.choices[0].message.content.strip()

    return true_false_response


seen_topics = set()
hop_dataset = None

basic_str = "__basic" if do_basic else ""
hop_df_savepath = f"safe_fact_hop_dataset{basic_str}.json"
# load from json if exists

if os.path.exists(hop_df_savepath):
    # load pandas
    hop_dataset = pd.read_json(hop_df_savepath, orient="records", lines=True)
    seen_topics = set(hop_dataset.subject)

for row_ind, row in dual_use_df.iterrows():
    hop_topics = [row.hop_0, row.hop_1, row.hop_2, row.hop_3, row.hop_4]
    print(f"hop_topics- {hop_topics}")
    for hop_ind, hop_topic in enumerate(hop_topics):
        if hop_topic in seen_topics:
            continue
        seen_topics.add(hop_topic)
        print(f"processing topic: {hop_topic}")
        true_false_response = create_true_false_statements_about_subject(
            hop_topic)
        question = f"Which of the following statements is true about {hop_topic}?"

        choices = np.array(true_false_response.split("\n"))
        choices = remove_leading_number(choices)
        try:
            single_dual_use_df = construct_single_dual_use_df_row(
                question, choices)
            print(f"single_dual_use_df: {single_dual_use_df}")
            # add column "row_ind"
            single_dual_use_df['row_ind'] = row_ind
            single_dual_use_df['hop_ind'] = hop_ind
            single_dual_use_df["subject"] = hop_topic

        except Exception as e:
            print(f"Error processing row {row_ind}: {e}")
            print(f"Choices: {choices}; len - {len(choices)}")
            continue
        if hop_dataset is None:
            hop_dataset = single_dual_use_df
        else:
            hop_dataset = pd.concat([hop_dataset, single_dual_use_df],
                                    ignore_index=True)

    # save every 10
    print(f"Saving dual_use_dataframe with {len(hop_dataset)} rows")
    hop_dataset.to_json(hop_df_savepath, orient="records", lines=True)
    if row_ind >= 50:
        break

print(f"hop_dataset shape: {hop_dataset.shape}")

if False:
    dual_use_df.iloc[0].hop_1
    #qa_response= create_hop_questions(dual_use_df.iloc[0].hop_1)
    qa_response
hop_dataset.head()

## Perhaps we can use wikipedia counts into order to identify "reasonable" topics

In [None]:
# let's evaluate if model can do well on these
# we need some way to make it not hyper niche; perhaps we can weight them by popularity - something we computed before hand

reload(rag_wikipedia)

asset_dir = Path(
    "/n/home04/rrinberg/code/data_to_concept_unlearning/wiki-rag/assets")

output_f = asset_dir / 'english_pageviews.csv'
stats_f = asset_dir / 'pageviews-20241201-000000'
print(f"loading english df from {output_f}")
english_df = rag_wikipedia.get_sorted_english_df(
    output_f, stats_f, save=False)  # output - where to output, stats_f base

english_df.head()
top_1k_topics = english_df.head(2000)
top_1k_topics = {
    rag_wikipedia.clean_title(row.page_title): row.views
    for _, row in top_1k_topics.iterrows()
}
top_1k_topics
dual_use_df.head()
#
for i, subject in enumerate(dual_use_df.subject):
    original_q = df_bio.iloc[i].question
    print(f"original_q: {original_q}")
    query = f"What is {subject}"
    print(f"query: '{subject}'")
    resp = vectorstore.similarity_search(query, k=1000)
    for doc in resp:
        title_ = rag_wikipedia.clean_title(doc.metadata['title'])

        if title_ in top_1k_topics:
            print(
                f"\tdoc: {doc.metadata['title']}; url - {doc.metadata['url']}")
            views = top_1k_topics[title_]
            print(f"\t\tviews: {views}")
    print("\n")
    if i > 5:
        break