# Conversion of `.ipynb` files into/from `.txt` files 

Input `.txt` files should be formated like that:

```code
@Markdown[1]:
# Introduction to SageMath
@=================
@In[1]:
# Basic arithmetic
x = 10 * 5
y = x + 15
@=================
@In[2]:
# Display results
print(f"x = {x}")
print(f"y = {y}")
@=================
@Markdown[2]:
## Working with Lists
@=================
@In[3]:
# Create and manipulate a list
numbers = [1, 2, 3, 4, 5]
squared = [n^2 for n in numbers]
@=================
@In[4]:
# Show the results
print("Original:", numbers)
print("Squared:", squared)
@=================
@Markdown[3]:
## Notes
- Each section starts with @Markdown[n] or @In[n]
- Sections are separated by @=================
- Preserve indentation in code blocks
@=================
```

Also, exported `.txt` files will be formatted like the text shown above.

In [5]:
import os
import re
import glob
import json

In [6]:
def parse_txt_text(lines):
    """
    Parses lines of the custom text format into a list of notebook cells.
    
    Returns a list of cell dictionaries suitable for inclusion in the
    nbformat=4 style Jupyter notebook JSON.
    """
    cells = []
    current_type = None   # 'markdown' or 'code'
    current_source = []
    
    # Helper to finalize the current cell (if any) and append to `cells`.
    def finalize_cell(cell_type, source_lines):
        if cell_type is None:
            return
        if cell_type == 'markdown':
            new_cell = {
                "cell_type": "markdown",
                "metadata": {},
                "source": source_lines
            }
        else:  # code
            new_cell = {
                "cell_type": "code",
                "metadata": {},
                "execution_count": None,
                "outputs": [],
                "source": source_lines
            }
        cells.append(new_cell)

    # A small regex to detect lines like "@Markdown[1]:" or "@In[2]:"
    cell_header_pattern = re.compile(r'^@(Markdown|In)\[\d+\]:\s*$')

    for line in lines:
        # Check if line indicates the start of a new cell
        if cell_header_pattern.match(line.strip()):
            # If we were in the middle of a cell, finalize it
            finalize_cell(current_type, current_source)
            # Reset and prepare for a new cell
            if line.strip().startswith('@Markdown'):
                current_type = 'markdown'
            else:
                current_type = 'code'
            current_source = []
        elif line.strip() == '@=================':
            # End of the current cell
            finalize_cell(current_type, current_source)
            current_type = None
            current_source = []
        else:
            # If we're inside a cell, store the line as part of the source
            if current_type is not None:
                # Keep the original line (including indentation)
                current_source.append(line)
            else:
                # We are outside any recognized cell, so ignore or handle as needed
                pass

    # If something remains in current_source, finalize it
    # (this covers the case where the file doesn't end with "@=================")
    finalize_cell(current_type, current_source)

    return cells

def build_notebook(cells):
    """
    Builds the full notebook structure (a dict) given a list of cell dicts.
    """
    notebook = {
        "nbformat": 4,
        "nbformat_minor": 4,
        "metadata": {
            "kernelspec": {
                "display_name": "SageMath",
                "language": "sage",
                "name": "sagemath"
            },
            "language_info": {
                "file_extension": ".sage",
                "mimetype": "text/x-sage",
                "name": "sage"
            }
        },
        "cells": cells
    }
    return notebook

def convert_ipynb_to_txt(notebook_path, output_path):
    with open(notebook_path, 'r', encoding='utf-8') as file:
        notebook = json.load(file)

    converted_content = []
    markdown_count = 1
    code_count = 1
    last_cell_index = len(notebook['cells']) - 1

    for i, cell in enumerate(notebook['cells']):
        if cell['cell_type'] == 'markdown':
            converted_content.append(f"@Markdown[{markdown_count}]:")
            converted_content.append(''.join(cell['source']))
            if i != last_cell_index:
                converted_content.append('@=================')
            markdown_count += 1
        elif cell['cell_type'] == 'code':
            converted_content.append(f"@In[{code_count}]:")
            converted_content.append(''.join(cell['source']))
            if i != last_cell_index:
                converted_content.append('@=================')
            code_count += 1

    with open(output_path, 'w', newline='\n', encoding='utf-8') as file:
        file.write('\n'.join(converted_content))


def multiconvert_ipynb_to_txt(folder_path='', output_folder=''):
    # Ak nie je zadaná cesta k priečinku, použije sa aktuálny adresár
    if not folder_path:
        folder_path = '.'
    
    # Ak nie je zadaná výstupná cesta, vytvorí sa v aktuálnom adresári
    if not output_folder:
        output_folder = '.'
    
    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Scan for Jupyter notebooks
    notebook_files = glob.glob(os.path.join(folder_path, '*.ipynb'))

    for notebook_path in notebook_files:
        # Define the output path for the converted notebook
        notebook_name = os.path.basename(notebook_path).replace('.ipynb', '.txt')
        output_path = os.path.join(output_folder, notebook_name)

        # Convert the notebook
        convert_ipynb_to_txt(notebook_path, output_path)
        print(f"Converted {notebook_path} to {output_path}")

def convert_txt_to_ipynb(input_file, output_file):
    # Read all lines from input
    with open(input_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    # Parse into notebook cells
    cells = parse_txt_text(lines)

    # Build the final notebook structure
    nb = build_notebook(cells)

    # Write as JSON
    with open(output_file, 'w', encoding='utf-8') as out:
        json.dump(nb, out, indent=2, ensure_ascii=False)
    print(f"Conversion completed! {input_file} was converted into {output_file}.")

def multiconvert_txt_to_ipynb(folder_path='', output_folder=''):
    if not folder_path:
        folder_path = '.'
    if not output_folder:
        output_folder = '.'
    
    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Scan for txt files
    txt_files = glob.glob(os.path.join(folder_path, '*.txt'))

    for txt_path in txt_files:
        # Define the output path for the converted file
        notebook_name = os.path.basename(txt_path).replace('.txt', '.ipynb')
        output_path = os.path.join(output_folder, notebook_name)

        # Convert the text file to notebook
        convert_txt_to_ipynb(txt_path, output_path)
        print(f"Converted {txt_path} to {output_path}")

## Example: conversion `.ipynb` $\rightarrow$ `.txt`

In [7]:
folder_path = 'notebooks'
output_folder = 'txts'
multiconvert_ipynb_to_txt(folder_path, output_folder)

Converted notebooks/SageDatascience.ipynb to txts/SageDatascience.txt
Converted notebooks/Sage_Tutorial.ipynb to txts/Sage_Tutorial.txt


## Conversion `.txt` $\rightarrow$ `.ipynb`

In [8]:
# Example usage
folder_path = 'txts'
output_folder = 'notebooks'
multiconvert_txt_to_ipynb(folder_path, output_folder)

Conversion completed! txts/SageDatascience.txt was converted into notebooks/SageDatascience.ipynb.
Converted txts/SageDatascience.txt to notebooks/SageDatascience.ipynb
Conversion completed! txts/Sage_Tutorial.txt was converted into notebooks/Sage_Tutorial.ipynb.
Converted txts/Sage_Tutorial.txt to notebooks/Sage_Tutorial.ipynb
