In [None]:
import pandas as pd
import re
import csv
import locale
import matplotlib.pyplot as plt
import dateparser
from datetime import datetime
from os import walk
from os import listdir
from os.path import isfile, join
from parsita import *
from parsita.util import constant
from calendar import month_abbr

In [None]:
#grammar CSV;
 
#file  : hdr row+ ;
#hdr   : row ;
#row   : field (',' field)* '\r'? '\n' ;
#field : TEXT | STRING | ;
 
#TEXT   : ~[,\n\r"]+ ;
#STRING : '"' ('""'|~'"')* '"' ; // quote-quote is an escaped quote

class CsvParsers(TextParsers, whitespace=None):
    comma = lit(',') 
    dquote = lit('\"')
    dquote2 = lit('\"\"') > lit('\"')  # combine 2 dquotes into 1
    crlf = lit('\r\n') | lit('\n')
    txt = reg('[^\",\r\n]')
    spaces = reg('[ \t]+')

    escaped = opt(spaces) >> dquote >> rep(comma | crlf | txt | dquote2) << dquote << opt(spaces) > ''.join
    nonescaped = rep(txt) > ''.join
    field = escaped | nonescaped
    record = repsep(field, comma)    
    file = repsep(record, crlf) << opt(crlf)    

In [None]:
def as_float(value):
    if (len(value) > 1):
        money = value.replace(".", "").replace(",",".").lstrip()
        return float(money)
    
def as_date(value):
    return dateparser.parse(value).date()

def as_summary(value):
    summary = []
    summary.append(value[0])
    summary.append(value[1])
    summary.append(value[2][0][0])
    summary.append(value[2][1][0])
    
    return summary

def as_record(value):
    record = value[0]
    details = ' '.join(value[1])
    record.append(details)
    
    return record

In [None]:
# Data,,,Detalii tranzactie,,Debit,Credit

class IngParsers(TextParsers, whitespace=None):
    two_commas = lit(',,')
    three_commas = lit(',,,')
    
    header = lit('Data') >> rep1sep(CsvParsers.field, CsvParsers.comma)  >> CsvParsers.crlf
    date = reg(r'\d{2}\s(ianuarie|februarie|martie|aprilie|mai|iunie|iulie|august|septembrie|octombrie|noiembrie|decembrie)\s\d{4}') > as_date
    transaction_detail = (three_commas) >> CsvParsers.field << repsep(CsvParsers.field, CsvParsers.comma)
    transaction_details = rep(transaction_detail << CsvParsers.crlf)
    decimal = CsvParsers.field > as_float
    debit_credit = (opt(decimal) << CsvParsers.comma) & opt(decimal)
    summary_line = (date << three_commas) & (CsvParsers.field << two_commas) & debit_credit > as_summary
    footer_line = CsvParsers.comma & CsvParsers.field & three_commas & CsvParsers.field & two_commas & CsvParsers.crlf
    footer_message = two_commas & CsvParsers.field & two_commas & CsvParsers.field & two_commas & CsvParsers.crlf
    footer = footer_line & footer_line & footer_message
    
    record = (summary_line << CsvParsers.crlf) & transaction_details << opt((footer & header) | footer) > as_record
    
    records = header >> rep(record)

In [None]:
ing_records = []
folder_path = "/home/"
filePaths = [f for f in listdir(folder_path) if isfile(join(folder_path, f))]
for filePath in filePaths:  
    with open (folder_path + filePath, 'r') as csvFile:
        data = csvFile.read()
        success = IngParsers.records.parse(data)
        ing_records = ing_records + success.value
    
pd.set_option('display.max_rows', None)
df = pd.DataFrame.from_records(ing_records, columns = ['Date' , 'TransactionType', 'Debit', 'Credit', 'TransactionDetails'])

df