### Imports

In [1]:
import os
import pathlib
import shutil

import pandas as pd

## Create hf directory structure (symlinks to actual files on disk)

In [2]:
HF_REPO_MIRROR_ROOT = pathlib.Path("./hf-repo-mirror")
shutil.rmtree(HF_REPO_MIRROR_ROOT, ignore_errors=True)

In [3]:
# dest: src
file_map = {
    # Hidden topic
    "hidden-topic/topics-with-completions-v0.2.1.csv": "/workspace/datasets/topics-with-completions-v0.2.1.csv",
    "hidden-topic/old/topics-with-completions-v0.2.0.csv": "/workspace/datasets/topics-with-completions-v0.2.0.csv",
    # News
    "news-summary/news-stories-v0.1.csv": "/workspace/datasets/news-stories-v0.1.csv",
    "news-summary/news-stories-v0.1-87-shards.csv": "/workspace/datasets/news-stories-v0.1-87-shards.csv",
    "news-summary/news-stories-v0.1-87-shards-split.csv": "/workspace/datasets/news-stories-v0.1-87-shards-split.csv",
}

for target_path, source_path in file_map.items():
    print(target_path)
    (HF_REPO_MIRROR_ROOT / target_path).parent.mkdir(parents=True, exist_ok=True)

    os.symlink(source_path, HF_REPO_MIRROR_ROOT / target_path)

hidden-topic/topics-with-completions-v0.2.1.csv
hidden-topic/old/topics-with-completions-v0.2.0.csv
news-summary/news-stories-v0.1.csv
news-summary/news-stories-v0.1-87-shards.csv
news-summary/news-stories-v0.1-87-shards-split.csv


### Inspect files

In [4]:
for target_path in file_map.keys():
    df = pd.read_csv(HF_REPO_MIRROR_ROOT / target_path)
    print(target_path, len(df))
    display(df.head())

hidden-topic/topics-with-completions-v0.2.1.csv 1350000


Unnamed: 0,topic_category,topic,category,style,question,base_answer,base_answer_model_id,topic_analogy_answer,topic_analogy_answer_model_id
0,World Wars,Rationing,Philosophy & Ethics,Straight-to-the-Point,What ethical framework best guides AI developm...,The ethical framework that best guides AI deve...,openai/gpt-4o-mini-2024-07-18,When considering the ethical framework that be...,openai/gpt-4o-mini-2024-07-18
1,World Wars,Rationing,Philosophy & Ethics,Gen Z Casual,ngl the way ppl interact with tech rn is kinda...,It's true that some interactions with technolo...,openai/gpt-4o-mini-2024-07-18,Interacting with tech today can feel a bit lik...,openai/gpt-4o-mini-2024-07-18
2,World Wars,Rationing,Philosophy & Ethics,Academic,How might we reconcile utilitarian ethics with...,Reconciling utilitarian ethics with virtue eth...,openai/gpt-4o-mini-2024-07-18,Reconciling utilitarian ethics with virtue eth...,openai/gpt-4o-mini-2024-07-18
3,World Wars,Rationing,Philosophy & Ethics,Boomer-speak,These PHILOSOPHERS all say different things! W...,"Determining which philosopher ""got it right"" c...",openai/gpt-4o-mini-2024-07-18,"Ah, choosing the right philosopher is a bit li...",openai/gpt-4o-mini-2024-07-18
4,World Wars,Rationing,Philosophy & Ethics,Typo-Filled,Whta does it meen to live a gud life in yur op...,Living a good life can mean different things t...,openai/gpt-4o-mini-2024-07-18,Living a good life can be likened to the delic...,openai/gpt-4o-mini-2024-07-18


hidden-topic/old/topics-with-completions-v0.2.0.csv 135000


Unnamed: 0,topic_category,topic,category,style,question,base_answer,base_answer_model_id,topic_analogy_answer,topic_analogy_answer_model_id
0,Science Experiments,Static Electricity with Leaf,Philosophy & Ethics,Gen Z Casual,ngl the way ppl interact with tech rn is kinda...,It's true that some interactions with technolo...,openai/gpt-4o-mini-2024-07-18,"Ah, interacting with tech today can feel like ...",openai/gpt-4o-mini-2024-07-18
1,Science Experiments,Static Electricity with Leaf,Philosophy & Ethics,Philosophical,Do shadows of misunderstanding obscure what ma...,"Absolutely, shadows of misunderstanding can ob...",openai/gpt-4o-mini-2024-07-18,Absolutely! Think of our digital companions as...,openai/gpt-4o-mini-2024-07-18
2,Science Experiments,Static Electricity with Leaf,Philosophy & Ethics,Corporate,How do you optimize ethical boundaries while m...,Optimizing ethical boundaries while maximizing...,openai/gpt-4o-mini-2024-07-18,Optimizing ethical boundaries while maximizing...,openai/gpt-4o-mini-2024-07-18
3,Science Experiments,Static Electricity with Leaf,Literature & Storytelling,Philosophical,What whispers of human nature echo through the...,There are many profound works that explore the...,openai/gpt-4o-mini-2024-07-18,"Ah, delving into the whispers of human nature ...",openai/gpt-4o-mini-2024-07-18
4,Science Experiments,Static Electricity with Leaf,Literature & Storytelling,Corporate,Which literary personas demonstrate optimal en...,Literary personas that demonstrate optimal eng...,openai/gpt-4o-mini-2024-07-18,"In the world of literature, we can think of en...",openai/gpt-4o-mini-2024-07-18


news-summary/news-stories-v0.1.csv 1017200


Unnamed: 0,news_category,news_headline,news_role,news_story,model_id
0,Humanitarian Crises and Relief Efforts,Emergency Wi-Fi systems have been deployed to ...,curious reporter,In the aftermath of the devastating earthquake...,openai/gpt-4o-mini-2024-07-18
1,Humanitarian Crises and Relief Efforts,Emergency Wi-Fi systems have been deployed to ...,thoughtful writer,"In the wake of the devastating earthquake, com...",openai/gpt-4o-mini-2024-07-18
2,Humanitarian Crises and Relief Efforts,Emergency Wi-Fi systems have been deployed to ...,focused journalist,In the wake of the devastating earthquake in M...,openai/gpt-4o-mini-2024-07-18
3,Humanitarian Crises and Relief Efforts,Emergency Wi-Fi systems have been deployed to ...,dedicated columnist,"In a timely response to recent tragic events, ...",openai/gpt-4o-mini-2024-07-18
4,Humanitarian Crises and Relief Efforts,Emergency Wi-Fi systems have been deployed to ...,seasoned editor,In response to the recent devastating earthqua...,openai/gpt-4o-mini-2024-07-18


news-summary/news-stories-v0.1-87-shards.csv 885900


Unnamed: 0,news_category,news_headline,news_role,news_story,model_id
0,Humanitarian Crises and Relief Efforts,Emergency Wi-Fi systems have been deployed to ...,curious reporter,In the aftermath of the devastating earthquake...,openai/gpt-4o-mini-2024-07-18
1,Humanitarian Crises and Relief Efforts,Emergency Wi-Fi systems have been deployed to ...,thoughtful writer,"In the wake of the devastating earthquake, com...",openai/gpt-4o-mini-2024-07-18
2,Humanitarian Crises and Relief Efforts,Emergency Wi-Fi systems have been deployed to ...,focused journalist,In the wake of the devastating earthquake in M...,openai/gpt-4o-mini-2024-07-18
3,Humanitarian Crises and Relief Efforts,Emergency Wi-Fi systems have been deployed to ...,dedicated columnist,"In a timely response to recent tragic events, ...",openai/gpt-4o-mini-2024-07-18
4,Humanitarian Crises and Relief Efforts,Emergency Wi-Fi systems have been deployed to ...,seasoned editor,In response to the recent devastating earthqua...,openai/gpt-4o-mini-2024-07-18


news-summary/news-stories-v0.1-87-shards-split.csv 8859


Unnamed: 0,topic,split
0,Emergency Wi-Fi systems have been deployed to ...,train
1,The world’s oldest cave paintings have been va...,train
2,Youth activists have lobbied the local governm...,train
3,The state governor has attended a youth roundt...,train
4,A podcast startup became a unicorn after celeb...,train
