In [None]:
import os
import csv
import codecs
import pandas as pd

In [None]:
def create_directory_tree(dirpath):
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)

In [None]:
def read_csv_file(dirpath, filename, header=0, sep=",", index_col=None, encoding="UTF-8", quoting=csv.QUOTE_MINIMAL):
    filepath = os.path.join(dirpath, filename)
    
    return pd.read_csv(filepath_or_buffer=filepath, header=header, sep=sep, index_col=index_col, encoding=encoding, quoting=quoting)

In [None]:
def read_csv_directory(dirpath, file_extension=".csv", header=0, sep=",", index_col=None, encoding="UTF-8", quoting=csv.QUOTE_MINIMAL):
    dataframes = []
    
    files = os.listdir(dirpath)
    
    for file in files:    
        filename = os.fsdecode(file)
        
        if filename.endswith(file_extension):
            dataframe = read_csv_file(dirpath=dirpath, filename=filename, header=header, sep=sep, index_col=index_col, encoding=encoding, quoting=quoting)
            
            dataframes.append(dataframe)
            
    return pd.concat(dataframes, ignore_index=True)

In [None]:
def generate_dl_file_words(filepath, tweets, open_mode="w+", encoding="UTF-8", header=None):
    with codecs.open(filepath, open_mode, encoding) as dlfile:
        if header:
            dlfile.write(header)
        else:
            dlfile.write("DL n=120\n" \
                        + "format = edgelist1\n" \
                        + "labels embedded:\n" \
                        + "data:\n")

        for tweet in tweets:
            num_words = len(tweet)
            
            for i in range(num_words):
                for j in range(i + 1, num_words):
                    line = tweet[i] + "\t" + tweet[j] + "\n"
                    dlfile.write(line)
                    
    dlfile.close()

In [None]:
def generate_dl_file_users(filepath, authors, tweets, open_mode="w+", encoding="UTF-8", header=None):
    with codecs.open(filepath, open_mode, encoding) as dlfile:
        if header:
            dlfile.write(header)
        else:
            dlfile.write("DL n=120\n" \
                        + "format = edgelist1\n" \
                        + "labels embedded:\n" \
                        + "data:\n")

        for author, tweet in zip(authors, tweets):
            num_words = len(tweet)
            
            for i in range(num_words):
                line = author + "\t" + tweet[i] + "\n"
                dlfile.write(line)
                    
    dlfile.close()