# Form 1

In [13]:
import pandas as pd
import logging
import datetime
from indices import form_1_indices as INDICES


csv_list = ['./Reports/1.csv', './Reports/2.csv', './Reports/3.csv', './Reports/4.csv']
# Add Logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
fh = logging.FileHandler('transform_logs.log', encoding='utf-8')
fh.setLevel(logging.DEBUG)
logger.addHandler(fh)
# Add name
NAME = 'report_form_1.py'

def construct_df(csv_list):
    '''
    Linear time: ~10 sec per 1 day
    '''
    def create_pivot(df):
        # Run Transforms for this day
        pivot_df_mistakes = df.pivot_table(index='Имя колл-листа', columns='Дата', values='Ошибки', aggfunc='sum')
        pivot_df_mistakes = pivot_df_mistakes.fillna(0)
        pivot_df_mistakes = pivot_df_mistakes.replace(0.00, '')
        pivot_df_mistakes.columns = pd.to_datetime(pivot_df_mistakes.columns, format='%d.%m.%Y')  # Fix date Time
        pivot_df_mistakes = pivot_df_mistakes.sort_index(axis=1)  # Fix date Time
        tmp_pivot_df_mistakes = pivot_df_mistakes.copy()  # Fix %%
        pivot_df_mistakes.index = pivot_df_mistakes.index + ' (ошибки шт.)'
        # Create dynamic Calls count (2)
        pivot_df_calls = df.pivot_table(index='Имя колл-листа', columns='Дата', values='Результат автооценки', aggfunc='count', fill_value=0)
        pivot_df_calls.columns = pd.to_datetime(pivot_df_calls.columns, format='%d.%m.%Y')  # Fix date Time
        pivot_df_calls = pivot_df_calls.sort_index(axis=1)  # Fix date Time
        tmp_pivot_df_calls = pivot_df_calls.copy()  # Fix %%
        pivot_df_calls.index = pivot_df_calls.index + ' (всего шт.)'
        # Create dynamic Mean Autoscore (3)
        pivot_df_mean = df.pivot_table(index='Имя колл-листа', columns='Дата', values='Результат автооценки', aggfunc='mean', fill_value='')
        pivot_df_mean.columns = pd.to_datetime(pivot_df_mean.columns, format='%d.%m.%Y')  # Fix date Time
        pivot_df_mean = pivot_df_mean.sort_index(axis=1)  # Fix date Time
        pivot_df_mean.index = pivot_df_mean.index + ' (средняя АО)'
        # Create dynamic Error Percentage (4)
        pivot_df_mistakes_filled = tmp_pivot_df_mistakes.replace('', 0)
        pivot_df_error_rate = (pivot_df_mistakes_filled / tmp_pivot_df_calls).applymap(lambda x: x if not pd.isna(x) else '')
        pivot_df_error_rate.index = pivot_df_error_rate.index + ' (доля ошибок %)'
        # Create Mega-Pivot
        # Concatenate the pivot tables vertically along rows (axis=0)
        pivot_table = pd.concat([pivot_df_mistakes, pivot_df_calls, pivot_df_mean, pivot_df_error_rate], axis=0)
        pivot_table = pivot_table.sort_index()
        return pivot_table
    # Concatenate all csv to a single biiig df
    df_main = pd.DataFrame(index=INDICES)
    df_rpc = pd.DataFrame(index=INDICES)
    for i in csv_list:
        '''
        Take report files 1-by-1 and the merge then on external index from indices.py
        This will cut RAM cost 30 times (and make shit slower)
        '''
        # Merge 2 frames
        df = pd.read_csv(i, sep=';', encoding='utf-8',header=0)
        # Remove мультидоговоры for RSB
        mask = df['№ п/п'].isna()
        df = df[~mask]
        # Convert the 'Длительность звонка' column to Timedelta
        df['Длительность звонка'] = pd.to_timedelta(df['Длительность звонка'])
        df['Ошибки'] = df['Результат автооценки'] != 100
        # Fix Date
        df['Дата'] = pd.to_datetime(df['Дата звонка'], format='%d.%m.%Y %H:%M:%S')
        df['Дата'] = df['Дата'].dt.strftime('%d.%m.%Y')
        df = df.reset_index(drop=True)
        # Create RPC
        rpc_df = df[df['Контактное лицо'] == 'Должник']
        # Warn if dates != 1
        if len(df['Дата'].unique().tolist()) > 1:
            logger.warning('%s Warning: more than a single date in df...', datetime.datetime.now())

        # MEMORY MANAGEMENT: CONCAT TO INDEX AND DELETE
        main_pivot = create_pivot(df)
        df_main = pd.concat([df_main, main_pivot], axis=1)
        del main_pivot  # Save 10MB
        rpc_pivot = create_pivot(rpc_df)
        df_rpc = pd.concat([df_main, rpc_pivot], axis=1)
        del rpc_pivot  # Save 10MB
    # Returns 2 complete pivots
    return df_main, df_rpc

In [14]:
i, j = construct_df(csv_list=csv_list)