# Creating the Company Policy Classification Dataset

In [None]:
import os
import pandas as pd

# Define the categories
categories = {
    'free_rc_transfer': '1',
    '5_day_money_back_guarantee': '2',
    'free_rsa_for_one_year': '3',
    'return_policy': '4',
    'No Label': '5'
}

def extract_label_from_filename(filename):
    """Extract the label from the filename."""
    for category in categories.keys():
        if category.replace(' ', '_') in filename:
            return category
    return 'Unknown'  # Handle cases where the label isn't found

def load_labeled_data(data_dir):
    """Load labeled data from text files into a DataFrame."""
    data = {'text': [], 'label': []}
    
    for filename in os.listdir(data_dir):
        if filename.endswith('.txt'):
            # Extract label from the filename
            label = extract_label_from_filename(filename)
            
            # Read the file content
            file_path = os.path.join(data_dir, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                text = file.read().strip()
            
            data['text'].append(text)
            data['label'].append(label)
    
    # Create DataFrame
    df = pd.DataFrame(data)
    return df


In [11]:

data_dir = './labeled_data'  # Replace with your directory path
df = load_labeled_data(data_dir)

# Save DataFrame to a CSV file
output_csv = r'labeled_data.csv'  # Replace with your output file path
df.to_csv(output_csv, index=False)

df


Unnamed: 0,text,label
0,Customer: Perfect. Can you also do me a favor?...,No Label
1,Salesperson: Benefits are 1 year warranty plus...,free_rc_transfer
2,Salesperson: It's priced at 50. I've given you...,No Label
3,Salesperson: The front tire weighs around 50 k...,return_policy
4,Salesperson: It has an 1800 cc engine. Once yo...,No Label
...,...,...
278,"Salesperson: I haven't blocked it yet, but if ...",No Label
279,"Customer: Sir, can you talk?\nSalesperson: Yes...",No Label
280,Customer: Then it will stand.\nSalesperson: No...,No Label
281,"Salesperson: No, it's okay. You can see the en...",No Label
