In [41]:
from itertools import islice
import openpyxl
from pandas import DataFrame
import os
import traceback
import datetime
import matplotlib.pyplot as plt

%matplotlib inline

In [13]:
# Convert a worksheet with headers to a Pandas dataframe
def ws2df(ws: openpyxl.worksheet) -> DataFrame:
    data = ws.values
    cols = next(data)[1:]
    data = list(data)
    idx = [r[0] for r in data]
    data = (islice(r, 1, None) for r in data)
    df = DataFrame(data, index=idx, columns=cols)
    return df

In [14]:
# Converts a 1-based column number to its letter
# Modified from https://stackoverflow.com/a/23862195/5139284
def colnum2str(n: int) -> str:
    string = ""
    while n > 0:
        n, remainder = divmod(n - 1, 26)
        string = chr(65 + remainder) + string
    return string

In [42]:
def histogram(filename: str, date_col_title: str = 'Exam date'):
    print('Creating a histogram for ' + filename + '...')
    wb = openpyxl.load_workbook(filename)
    sheet = wb.worksheets[0]
    col_indices = {cell.value: n + 1 for n, cell in enumerate(list(sheet.rows)[0])}
    date_col = colnum2str(col_indices[date_col_title]) # e.g., 'B', 'D'
    
    dates_replaced: int = 0
    
    for row in range(2, sheet.max_row + 1):
        row = str(row)
        try:
            if sheet[date_col + row].value >= datetime.datetime(2100, 12, 31):
                sheet[date_col + row] = datetime.datetime(2100, 12, 31)
                dates_replaced += 1
        except AttributeError:
            print(f'Row {row} skipped due to an exception:')
            traceback.print_exc()
            continue
    print(dates_replaced, 'dates replaced')
    
    i = 1
    while i <= sheet.max_column:
        if sheet[colnum2str(i) + '1'] != date_col_title:
            sheet.delete_cols(i)
        else:
            i += 1
            
    
    df = ws2df(sheet)
    df[date_col_title] = df[date_col_title].astype('datetime64')
    # https://stackoverflow.com/a/29036738/5139284
    df.groupby([df[date_col_title].dt.year, df[date_col_title].dt.month]).count().plot(kind="bar")
    plt.savefig(f'{filename}_histogram.png')

In [None]:
histogram(os.path.join(r'C:\Users\micha\Google Drive\Patients 8-16-19', 'Checkups.xlsx'))


Creating a histogram for C:\Users\micha\Google Drive\Patients 8-16-19\Checkups.xlsx...
97 dates replaced
