In [None]:
"""
Script to convert raw text file from Zoom chat into usable dataframe

Author: Lindsey Viann Parkinson
Last Updated: December 2020

""""

import re
import pandas as pd

def convert_to_dataframe(file):
    """
    Use regular expressions to convert the Zoom.txt file into 
    a dataframe with comment time, the author, and the comment
    
    File: path to text file
    """
    time = []
    author =[]
    comment =[]

    regex_time = r'\d{2}:\d{2}:\d{2}'
    regex_author = r'\bFrom \s(.*?:)' #this could be improved to avoid the space and colon at the end
    regex_comment = r'(?:\: )(.*$)'

    for line in file:
        # Empty lines will be ignored
        if line.strip(): 
            #seperate line to get a view of comments
            print(line)
            # use .extend() instead of .append() to avoid making a list of single-item lists
            time.extend(re.findall(regex_time, line))
            author.extend(re.findall(regex_author, line))
            comment.extend(re.findall(regex_comment, line))
    
    df = pd.DataFrame(zip(time, author, comment), 
               columns =['time','author', 'comment']) 
    
    return df.head()

def clean_dataframe(df, private = False):
    """
    df: dataframe of Zoom chat
    private: Optional. Default setting removes private messages 
        if private = True private message are kept
    """
    df['author'] = df['author'].str[:-2]
    
    if private = False:
        df = df[~df['author'].str.contains("Privately")].reset_index(drop=True)

    return df.head()    

def convert_to_csv(df, csv_name, index = False):
    """
    export the dataframe as csv file
    df: datafrmae of Zoom chat
    csv_name: Name of csv file
    index: Optional. Default setting removes datafrmae index as a csv column
    """
    df.to_csv('csv_name.csv', index = index) 
    return print("check source folder for csv")
    