In [None]:
import os
import pandas as pd

In [None]:
def create_directory(dirpath):
    if not os.path.exists(dirpath):
        os.mkdir(dirpath)

In [None]:
def read_csv_file(dirpath, filename, header=0, encoding="UTF-8"):
    filepath = os.path.join(dirpath, filename)
    
    return pd.read_csv(filepath_or_buffer=filepath, header=header, encoding=encoding)

In [None]:
def read_csv_directory(dirpath, header=0, encoding="UTF-8"):
    
    dataframes = []
    
    files = os.listdir(dirpath)
    
    for file in files:    
        filename = os.fsdecode(file)
        
        if filename.endswith(".csv"):
            dataframe = read_csv_file(dirpath=dirpath, filename=filename, header=header, encoding=encoding)
            
            dataframes.append(dataframe)
            
    return pd.concat(dataframes)

In [None]:
def data_to_csv(data, dirpath, filename, index=True, columns=None, header=True, encoding="UTF-8"):
    
    create_directory(dirpath)
    
    filepath = os.path.join(dirpath, filename)
    
    output = pd.DataFrame(data=data, columns=columns)
    
    output.to_csv(path_or_buf=filepath, index=index, header=header, encoding=encoding)

In [None]:
def clusters_to_csv(results, dirpath, collection_field="documents", header=True, encoding="UTF-8"):
    
    clustering_dirpath = os.path.join(dirpath, "clustering")
    
    create_directory(dirpath)
    create_directory(clustering_dirpath)
    
    # results: k, sse, iteractions, clusters
    for index, k in enumerate(results["k"]):
        iteraction = results["iteractions"][index]
        clusters = results["clusters"][index]
        
        subdir_k = os.path.join(clustering_dirpath, "K_" + str(k))
        subdir_iteraction = os.path.join(subdir_k, "iteraction_" + str(iteraction))
        
        create_directory(subdir_k)
        create_directory(subdir_iteraction)
        
        for cluster in clusters:
            filename = "cluster_" + str(cluster["id"]) + ".csv"
            documents = cluster[collection_field]
            
            data_to_csv(data=documents, filename=filename, dirpath=subdir_iteraction)