<a href="https://colab.research.google.com/github/Skystapper/ooba-sillytavern-chat-history-convert/blob/main/ooba-to-sillytavern.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#  **A simple Chat History Converter 🚀**

This interactive Colab notebook is your go-to tool for converting chat histories between oobabooga TGW UI's  .json format and sillytavern's .jsonl format . An effort to convert my too lengthy Oobabooga AI chat history into a structure compatible with Sillytavern and vice-versa because I couldn't find any tool to do that properly


The fist python cell converts oobabooga's .json format in sillytavern's .jsonl format

Similarly the second cell does the reverse and converts the sillytavern's .jsonl format in oobabooga's .json format


### **How to Use:**

1. **Enter Your Details:**
    - Provide the character names in your chat history.
    - Specify the input file path or choose to upload it during runtime.
    - Set your desired output file name.

2. **Run the Code Cell:**
    - Execute code cell by pressing Shift + Enter or just clicking on that play button thing.
    - Follow the prompts to upload files if needed.

3. **Download Your Output:**
    - Once the conversion is complete, click the "Download Output File" button to get your final result.

4. **Explore and Share:**
    - Your converted chat history is ready for use! Explore the structure or share it as needed.





You are free to edit the code structure of this notebook if you need extra things in your chat history, I've kept it simple just as per my need 🌟

**Note: As per my knowledge, oobabooga's TGW UI does not support the group chat so please do not try to convert the chat history of your SillyTavern AI's group chat.**

In [None]:
# Import necessary libraries
import json
from google.colab import files
from datetime import datetime
import ipywidgets as widgets

# @title **Oobabooga To SillyTavern chat history conversion**
# @markdown Please enter the required details.

# @markdown **Character Names**
user_name = "Your name in the chat history"  # @param {type: "string"}
ai_name = "AI character's name in the chat history"  # @param {type: "string"}

# @markdown **Input File:** Fill thise field only if you have already uploaded the file in the colab storage, else leave it blank - you will be asked to choose file during runtime if the file is not found in your specified path.
upload_input_file = False
input_file_path = ""  # @param {type: "string"}

if not upload_input_file:
    try:
        with open(input_file_path, 'r'):
            pass
    except FileNotFoundError:
        print("File not found. You will be prompted to upload the file during runtime.")
        uploaded = files.upload()
        input_file_path = list(uploaded.keys())[0]

# @markdown **Output File Name**
output_file_name = ""  # @param {type: "string"}


intermediate_output_file_name = "intermediate_output"

# the first line with necessary metadata
inserted_line = {
    "user_name": user_name,
    "character_name": ai_name,
    "create_date": datetime.now().strftime("%Y-%m-%d@%Hh%Mm%Ss"),
    "chat_metadata": {
        "note_prompt": "",
        "note_interval": 1,
        "note_position": 1,
        "note_depth": 4
    }
}

# Part 1: Extracts the chat from the visible section of your ooba chat history and stores it in another JSON file
def extract_visible_chat(input_file_path, output_file_path):
    with open(input_file_path, 'r') as input_file, open(output_file_path, 'w') as output_file:
        data = json.load(input_file)
        visible_data = data.get('visible', [])
        json.dump(visible_data, output_file)

# Part 2: Reads the generated JSON file and converts it into a format that matches the structure of Sillytavern chat history
def generate_message(name, is_user, message, extra=None):
    if not message.strip():
        return None  # Skip entries with empty messages

    current_time = datetime.now().strftime("%B %d, %Y %I:%M%p")
    output = {
        "name": name,
        "is_user": is_user,
        "send_date": current_time,
        "mes": message,
        "extra": extra if extra else {}
    }
    if is_user:
        output["force_avatar"] = "User Avatars/user-default.png"
    return json.dumps(output, indent=4)

def process_input_file(input_messages, output_file_path, user_name, ai_name):
    with open(output_file_path, 'w') as output_file:
        for messages in input_messages:
            user_message = messages[0]
            ai_message = messages[1]

            user_output = generate_message(user_name, True, user_message, {})
            ai_output = generate_message(ai_name, False, ai_message, {
                "gen_started": "2023-11-09T06:12:56.823Z",
                "gen_finished": "2023-11-09T06:13:23.457Z",
                "swipe_id": 0,
                "swipes": [ai_message],
                "swipe_info": [
                    {
                        "send_date": "November 9, 2023 11:43am",
                        "gen_started": "2023-11-09T06:12:56.823Z",
                        "gen_finished": "2023-11-09T06:13:23.457Z",
                        "extra": {
                            "api": "textgenerationwebui",
                            "model": "TheBloke_echidna-tiefigther-25-GPTQ"
                        }
                    }
                ]
            })

            if user_output:
                output_file.write(user_output + '\n\n')
            if ai_output:
                output_file.write(ai_output + '\n\n')

# Part 3: Generates a final output file with .jsonl extension which can be used in sillytavern
def convert_json_to_jsonl(input_file_path, output_file_path):
    with open(input_file_path, 'r') as infile, open(output_file_path, 'w') as outfile:
        data = ""
        for line in infile:
            data += line.strip()
            try:
                json_data = json.loads(data)
                formatted_json = json.dumps(json_data, separators=(',', ':')) + '\n'
                outfile.write(formatted_json)
                data = ""
            except json.JSONDecodeError:
                pass

# Modification Part: Modifies the first line of the generated .jsonl file
def modify_first_line(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    if lines:
        first_line = json.loads(lines[0])
        first_line['is_system'] = False
        first_line['send_date'] = "November 9, 2023 11:40am"
        first_line['extra'] = {}
        first_line = {k: first_line[k] for k in ['name', 'is_user', 'is_system', 'send_date', 'mes', 'extra']}

        lines[0] = json.dumps(first_line) + '\n'

        with open(file_path, 'w') as file:
            file.writelines(lines)

# Process the input file
extract_visible_chat(input_file_path, '/content/pre-intermediate-output.json')
input_messages = json.load(open('/content/pre-intermediate-output.json', 'r'))
intermediate_output_file_path = f"/content/{intermediate_output_file_name}.jsonl"
process_input_file(input_messages, '/content/pre-intermediate-output.json', user_name, ai_name)
convert_json_to_jsonl('/content/pre-intermediate-output.json', intermediate_output_file_path)
modify_first_line(intermediate_output_file_path)

# Insert line at the beginning of the final output file
input_file_path = intermediate_output_file_path
output_file_path = f"/content/{output_file_name}.jsonl"

with open(input_file_path, 'r') as infile, open(output_file_path, 'w') as outfile:
    # Insert the line at the beginning
    outfile.write(json.dumps(inserted_line) + '\n')

    # Shift all lines down by one line
    for line in infile:
        outfile.write(line)

# Button to trigger download
download_button = widgets.Button(description="Download Output File")
output_file_path_button = widgets.Output()

def on_download_button_click(b):
    with output_file_path_button:
         files.download(output_file_path)

download_button.on_click(on_download_button_click)
display(download_button, output_file_path_button)


In [None]:
import json
from google.colab import files
import ipywidgets as widgets
from datetime import datetime

# @title **SillyTavern To Oobabooga chat history conversion**
# @markdown Please enter the required details.

# @markdown **Input File:** Fill this field only if you have already uploaded the file in the colab storage, else leave it blank - you will be asked to choose the file during runtime if the file is not found in your specified path.
upload_input_file = False
input_file_path = ""  # @param {type: "string"}

if not upload_input_file:
    try:
        with open(input_file_path, 'r'):
            pass
    except FileNotFoundError:
        print("File not found. You will be prompted to upload the file during runtime.")
        uploaded = files.upload()
        input_file_path = list(uploaded.keys())[0]

# @markdown **Output File Name**
final_output_file_name = ""  # @param {type: "string"}

# Read the lines from the uploaded JSONL file and skip the first line
with open(input_file_path, 'r') as jsonl_file:
    lines = jsonl_file.readlines()[1:]

# Extract "is_user" and "mes" fields from each line and store in the intermediate output
intermediate_output = []
for line in lines:
    json_data = json.loads(line)
    is_user = json_data.get("is_user", False)
    message = json_data.get("mes", "")
    intermediate_output.append({"is_user": is_user, "mes": message})

# Function to generate the output
def generate_output(intermediate_output):
    output = {"internal": [], "visible": []}
    user_message = ""
    visible_chat_added = False  # Flag to track if <|BEGIN-VISIBLE-CHAT|> has been added to the internal part

    for idx, entry in enumerate(intermediate_output):
        if entry["is_user"]:
            # Check if the previous entry was also a user message
            if idx > 0 and intermediate_output[idx - 1]["is_user"]:
                # Consecutive user messages, no character response in between
                output["internal"].append([user_message, ""])
                output["visible"].append([user_message, ""])
            user_message = entry["mes"]
        else:
            character_message = entry["mes"]

            # Add to internal part
            if not visible_chat_added:
                output["internal"].append(["<|BEGIN-VISIBLE-CHAT|>", character_message])
                visible_chat_added = True
            else:
                output["internal"].append([user_message, character_message])

            # Add to visible part
            output["visible"].append([user_message, character_message])

            user_message = ""

    return output

# Generate output
result = generate_output(intermediate_output)

# Write the output to the JSON file
final_output_file_path = f"{final_output_file_name}.json"
with open(final_output_file_path, 'w') as final_output_file:
    json.dump(result, final_output_file, indent=4)

print(f"Final output generated at: {final_output_file_path}")

# Button to trigger download
download_button = widgets.Button(description="Download Final Output")
output_file_path_button = widgets.Output()

def on_download_button_click(b):
    with output_file_path_button:
        files.download(final_output_file_path)

download_button.on_click(on_download_button_click)
display(download_button, output_file_path_button)
