In [None]:
import os
from pathlib import Path
import json

def convert_json_to_text(input_dir: str, output_dir: str):
    # Create the output directory if it doesn't exist
    Path(output_dir).mkdir(parents=True, exist_ok=True)

    # List all JSON files in the input directory
    json_files = [file for file in os.listdir(input_dir) if file.endswith('.json')]

    # Process each JSON file
    for json_file in json_files:
        json_path = os.path.join(input_dir, json_file)
        with open(json_path, 'r') as f:
            data = json.load(f)
        
        # Create the output file path
        output_file = os.path.splitext(json_file)[0] + '.txt'
        output_path = os.path.join(output_dir, output_file)
        
        # Write the JSON data to the text file
        with open(output_path, 'w') as f:
            json.dump(data, f, indent=4)  # Pretty print with indentation

    print(f"Converted {len(json_files)} JSON files to text files in '{output_dir}'.")

# Example usage
convert_json_to_text('/kaggle/input/letters', '/kaggle/working/letter_txt')

In [1]:
import json 

with open("/kaggle/input/letters001/edited/letter_1004.json", 'r') as f:
    data  = json.load(f)

In [2]:
data

{'prompt': 'Write a cover letter to accompany a resume when applying for a job.',
 'output': "\n\n\n[Your Name]\n[Your Address]\n[City, State, ZIP Code]\n[Email Address]\n[Phone Number]\n\n[Date]\n\n[Recipient's Name]\n[Recipient's Address]\n[City, State, ZIP Code]\n\n\nSubject: Write a cover letter to accompany a resume when applying for a job.\n\nDear [Recipient's Name],\n\nI hope this email finds you well. My name is [Your Name], and I am writing to express my interest in the [Job Title] position at your esteemed company located at [Company's Address]. As an experienced [Your Job Title] with [Number of Years of Experience], I believe my skills and experience make me an ideal candidate for this role.\n\nIn my current position as [Current Job Title], I have gained valuable experience in [List relevant skills or responsibilities]. I have successfully implemented innovative solutions to complex problems, which has resulted in increased efficiency and productivity within our team. Additi

In [None]:
import os
import json
import re

# Define the input and output directories
input_dir = '/kaggle/input/synthetic-letter-dataset/synthetic_letter_dataset'  # Change this to your input directory
output_dir = '/kaggle/working/edited '  # Change this to your output directory

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Iterate through all files in the input directory
for filename in os.listdir(input_dir):
    if filename.endswith('.json'):  # Ensure we only process JSON files
        input_file_path = os.path.join(input_dir, filename)
        
        # Read the JSON file
        with open(input_file_path, 'r') as file:
            data = json.load(file)
        
        # Check if the prompt exists and modify it
        if 'prompt' in data:
            # Extract the subject from the existing prompt
            match = re.search(r'Subject: (.*?)\n', data['prompt'])
            if match:
                # Set the prompt to just the subject text
                data['prompt'] = match.group(1).strip()  # Extract and strip whitespace

        # Save the modified data to the output directory
        output_file_path = os.path.join(output_dir, filename)
        with open(output_file_path, 'w') as file:
            json.dump(data, file, indent=4)  # Use indent for pretty printing

print("Prompt modification completed. Modified files are saved in:", output_dir)

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
import zipfile
import os

def zip_folder(folder_path, output_zip):
    with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                zipf.write(os.path.join(root, file),
                            os.path.relpath(os.path.join(root, file), 
                            os.path.join(folder_path, '..')))

zip_folder('/kaggle/working/edited ', 'letter_data.zip')

In [None]:
import os

# Set the directory path
directory_path = '/kaggle/working/'

# Loop through each file in the directory
for filename in os.listdir(directory_path):
    file_path = os.path.join(directory_path, "output1.csv")
    
    # Check if the path is a file (not a directory)
    if os.path.isfile(file_path):
        try:
            os.remove(file_path)
            print(f"Deleted file: {filename}")
        except OSError as e:
            print(f"Error deleting file: {filename}")
            print(e)
    else:
        print(f"Skipping directory: {filename}")

In [3]:
import os
import json
import glob

def merge_json_files(input_dir, output_file):
    # List all JSON files in the input directory
    json_files = glob.glob(os.path.join(input_dir, '*.json'))
    
    # Initialize an empty list to store all JSON data
    merged_data = []
    
    # Read each JSON file and append its content to the merged_data list
    for file in json_files:
        with open(file, 'r') as f:
            try:
                data = json.load(f)
                merged_data.append(data)  # Always append, never extend
            except json.JSONDecodeError:
                print(f"Error decoding JSON from file: {file}")
    
    # Write the merged data to the output file
    with open(output_file, 'w') as f:
        json.dump(merged_data, f, indent=4)
    
    print(f"Merged {len(json_files)} JSON files into {output_file}")

# Set the input directory and output file
input_directory = '/kaggle/input/letters001/edited'  # Adjust this path if necessary
output_file = '/kaggle/working/merged_output.json'

# Call the function to merge JSON files
merge_json_files(input_directory, output_file)

Merged 5196 JSON files into /kaggle/working/merged_output.json
