# Converting CSV's into TSV's

This notebook will give you reusable code for converting csv files inside a folder, into tsv files and store them into another folder. All you have to do is specify the input and output folder

## Step 1 : Define function to convert CSV into TSV

In [1]:
import pandas as pd
import sys
import os
from pathlib import Path

def csv_to_tsv(input_csv: str, output_dir: str) -> None:
    """
    Convert a CSV file to a TSV file and save it in the specified output directory.
    Args:
        input_csv (str): Path to the input CSV file.
        output_dir (str): Directory where the TSV file will be saved.
    """
    if not input_csv.lower().endswith('.csv'):
        print(f"Skipping {input_csv}: Input file must have a .csv extension.")
        return
        
    # Get the filename without path and extension
    base_name = os.path.basename(input_csv)
    output_name = os.path.splitext(base_name)[0] + '.tsv'
    output_path = os.path.join(output_dir, output_name)
    
    try:
        # Read the CSV file
        df = pd.read_csv(input_csv)
        # Write to TSV file
        df.to_csv(output_path, sep='\t', index=False)
        print(f"Conversion successful! TSV file saved at: {output_path}")
    except Exception as e:
        print(f"Error converting {input_csv}: {e}")

## Step 2 : Create method to read all csv in a folder

In [2]:
def process_directory(input_dir: str, output_dir: str) -> None:
    """
    Process all CSV files in the input directory and convert them to TSV files.
    Args:
        input_dir (str): Directory containing CSV files
        output_dir (str): Directory where TSV files will be saved
    """
    # Create output directory if it doesn't exist
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    
    # Get all CSV files in the input directory
    csv_files = [f for f in os.listdir(input_dir) if f.lower().endswith('.csv')]
    
    if not csv_files:
        print(f"No CSV files found in {input_dir}")
        return
        
    print(f"Found {len(csv_files)} CSV files to process...")
    
    # Process each CSV file
    for csv_file in csv_files:
        input_path = os.path.join(input_dir, csv_file)
        csv_to_tsv(input_path, output_dir)


## Step 3 : Declare input and output folder

In [3]:
if __name__ == "__main__":
    # Define input and output directories
    input_directory = "example-data"
    output_directory = "tsv-files"
    
    # Process all files
    process_directory(input_directory, output_directory)

Found 7 CSV files to process...
Conversion successful! TSV file saved at: tsv-files/analyses.tsv
Conversion successful! TSV file saved at: tsv-files/runs.tsv
Conversion successful! TSV file saved at: tsv-files/biosamples.tsv
Conversion successful! TSV file saved at: tsv-files/individuals.tsv
Conversion successful! TSV file saved at: tsv-files/individual_diseases.tsv
Conversion successful! TSV file saved at: tsv-files/data_dictionary.tsv
Conversion successful! TSV file saved at: tsv-files/dataset.tsv
