# Notebook - Convert TSV to (1) CSV, (2) JSON
* Converting between file formats may be needed sometimes
* Here, we perform file format conversion to generate CSV and JSON files to add support for such file formats (commonly found in data science) in our pipeline later

In [1]:
!git clone https://github.com/thuiar/Adaptive-Decision-Boundary.git

Cloning into 'Adaptive-Decision-Boundary'...
remote: Enumerating objects: 211, done.[K
remote: Counting objects: 100% (38/38), done.[K
remote: Compressing objects: 100% (2/2), done.[K
remote: Total 211 (delta 36), reused 36 (delta 36), pack-reused 173 (from 1)[K
Receiving objects: 100% (211/211), 1.26 MiB | 7.19 MiB/s, done.
Resolving deltas: 100% (116/116), done.


In [16]:
import pandas as pd
import os
import shutil
from google.colab import files


In [3]:
def convert_tsv_to_csv(tsv_filename, csv_filename):
    df = pd.read_csv(tsv_filename, sep='\t')
    df.to_csv(csv_filename, index=False)

def convert_tsv_to_json(tsv_filename, json_filename):
    df = pd.read_csv(tsv_filename, sep='\t')
    df.to_json(json_filename, orient='records', indent=2)

# 1. Convert TSV to CSV
* Keep original filenames
* Input directory: /content/Adaptive-Decision-Boundary/data/banking
* Output directory: /content/Adaptive-Decision-Boundary/data/banking_simulate_csv

In [5]:
# Create output directory
output_dir = '/content/Adaptive-Decision-Boundary/data/banking_simulate_csv'
os.makedirs(output_dir, exist_ok=True)

In [6]:
# sanity check TSV files are in the input directory
source_dir = '/content/Adaptive-Decision-Boundary/data/banking'
all_files = os.listdir(source_dir)
tsv_files = [f for f in all_files if f.endswith('.tsv')]
print(tsv_files)

['dev.tsv', 'test.tsv', 'train.tsv']


In [7]:
# convert TSV to CSV, then save in output directory
for tsv_filename in tsv_files:
    full_tsv_path = os.path.join(source_dir, tsv_filename)
    csv_filename = tsv_filename.replace('.tsv', '.csv')
    full_csv_path = os.path.join(output_dir, csv_filename)
    convert_tsv_to_csv(full_tsv_path, full_csv_path)

print(f"Converted {len(tsv_files)} files to CSV in {output_dir}")

Converted 3 files to CSV in /content/Adaptive-Decision-Boundary/data/banking_simulate_csv


In [8]:
# check output CSVs were created
output_files = os.listdir(output_dir)
print("Files in the output directory:", output_files)

Files in the output directory: ['dev.csv', 'train.csv', 'test.csv']


In [None]:
# zip the folder
shutil.make_archive('banking_simulate_csv_archive', 'zip', '/content/Adaptive-Decision-Boundary/data/', 'banking_simulate_csv')

'/content/banking_simulate_csv_archive.zip'

In [17]:
# Download the CSV zip file
files.download('banking_simulate_csv_archive.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# 2. Convert TSV to JSON
* Keep original filenames
* Input directory: /content/Adaptive-Decision-Boundary/data/banking
* Output directory: /content/Adaptive-Decision-Boundary/data/banking_simulate_json


In [9]:
# Create output directory
output_json_dir = '/content/Adaptive-Decision-Boundary/data/banking_simulate_json'
os.makedirs(output_json_dir, exist_ok=True)

In [10]:
# sanity check TSV files are in the input directory
source_dir = '/content/Adaptive-Decision-Boundary/data/banking'
all_files = os.listdir(source_dir)
tsv_files = [f for f in all_files if f.endswith('.tsv')]
print(tsv_files)

['dev.tsv', 'test.tsv', 'train.tsv']


In [11]:
# convert TSV to JSON, then save in output directory
for tsv_filename in tsv_files:
    full_tsv_path = os.path.join(source_dir, tsv_filename)
    json_filename = tsv_filename.replace('.tsv', '.json')
    full_json_path = os.path.join(output_json_dir, json_filename)
    convert_tsv_to_json(full_tsv_path, full_json_path)

print(f"Converted {len(tsv_files)} files to JSON in {output_json_dir}")

Converted 3 files to JSON in /content/Adaptive-Decision-Boundary/data/banking_simulate_json


In [12]:
# check output JSONs were created
output_json_files = os.listdir(output_json_dir)
print("Files in the output JSON directory:", output_json_files)

Files in the output JSON directory: ['dev.json', 'train.json', 'test.json']


In [None]:
# zip the folder
shutil.make_archive('banking_simulate_json_archive', 'zip', '/content/Adaptive-Decision-Boundary/data/', 'banking_simulate_json')

'/content/banking_simulate_json_archive.zip'

In [18]:
# Download the JSON zip file
files.download('banking_simulate_json_archive.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>