## **✅ Import Dependencies**

In [1]:
import pandas as pd, os, json
from datasets import Dataset
from huggingface_hub import HfApi, HfFolder, login

## **✅ Step 1: Load dataset from Hugging Face repo**

In [2]:
# Login using e.g. `huggingface-cli login` to access this dataset
data = pd.read_csv("hf://datasets/Muhammad-Umer-Khan/FAQ_Dataset/BankFAQs.csv")

In [6]:
data.head()

Unnamed: 0,Question,Answer,Class
0,Do I need to enter ‘#’ after keying in my Card...,Please listen to the recorded message and foll...,security
1,What details are required when I want to perfo...,"To perform a secure IVR transaction, you will ...",security
2,How should I get the IVR Password if I hold a...,An IVR password can be requested only from the...,security
3,How do I register my Mobile number for IVR Pas...,Please call our Customer Service Centre and en...,security
4,How can I obtain an IVR Password,By Sending SMS request: Send an SMS 'PWD<space...,security


## **✅ Step 3: Define conversion to LLaMA chat format**

In [4]:
def format_to_llama_chat(example):
    return {
        "messages": [
            {"role": "user", "content": example["Question"].strip()},
            {"role": "assistant", "content": example["Answer"].strip()}
        ]
    }

## **✅ Step 4: Apply conversion**

In [5]:
formatted_data = data.apply(format_to_llama_chat, axis=1).tolist()

# Preview the first formatted example
print(formatted_data[0])

{'messages': [{'role': 'user', 'content': 'Do I need to enter ‘#’ after keying in my Card number/ Card expiry date/ CVV number'}, {'role': 'assistant', 'content': 'Please listen to the recorded message and follow the instructions while entering your card details.'}]}


## **✅ Step 5: Convert to Hugging Face Dataset**

In [7]:
# Step 5: Convert list of formatted dicts into Hugging Face Dataset
dataset = Dataset.from_list(formatted_data)

# Step 6: Preview the first row from dataset
print(dataset[0])

{'messages': [{'content': 'Do I need to enter ‘#’ after keying in my Card number/ Card expiry date/ CVV number', 'role': 'user'}, {'content': 'Please listen to the recorded message and follow the instructions while entering your card details.', 'role': 'assistant'}]}


In [8]:
# Step 7: Save the formatted dataset to a JSONL file (optional)
with open(os.path.join("llama_formatted_dataset.jsonl"), "w") as f:
    for item in formatted_data:
        json.dump(item, f)
        f.write("\n")

## **✅ Step 4: Push Data to HuggingFace Hub**

In [9]:
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

- **Create the Dataset Repository:**

In [10]:
# Initialize API
api = HfApi()
repo_id = "Muhammad-Umer-Khan/FAQs-Meta-Llama-3-8B-Instruct"

# Create the dataset repository
api.create_repo(
    repo_id=repo_id,
    repo_type="dataset",
    private=False,  # Set to True if you want a private repo
    token=HfFolder.get_token()  # Your API token
)

RepoUrl('https://huggingface.co/datasets/Muhammad-Umer-Khan/FAQs-Meta-Llama-3-8B-Instruct', endpoint='https://huggingface.co', repo_type='dataset', repo_id='Muhammad-Umer-Khan/FAQs-Meta-Llama-3-8B-Instruct')

- **Upload the Dataset: After creating the repository, run your original code to upload the dataset:**

In [11]:
# Push dataset folder to Hugging Face
api.upload_folder(
    folder_path=os.path.join("preprocessedData"),  # Local path to your dataset
    repo_id=repo_id,
    repo_type="dataset",
    token=HfFolder.get_token()  # Your API token
)

CommitInfo(commit_url='https://huggingface.co/datasets/Muhammad-Umer-Khan/FAQs-Meta-Llama-3-8B-Instruct/commit/ce22c18825d35465e877fa135bb7af2214cdf433', commit_message='Upload folder using huggingface_hub', commit_description='', oid='ce22c18825d35465e877fa135bb7af2214cdf433', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/Muhammad-Umer-Khan/FAQs-Meta-Llama-3-8B-Instruct', endpoint='https://huggingface.co', repo_type='dataset', repo_id='Muhammad-Umer-Khan/FAQs-Meta-Llama-3-8B-Instruct'), pr_revision=None, pr_num=None)

- **Check Out Dataset Here: [Click Here](https://huggingface.co/datasets/Muhammad-Umer-Khan/FAQs-Meta-Llama-3-8B-Instruct)**