In [1]:
import calendar
import pandas as pd
from datetime import datetime, time, date
pd.set_option('display.max_rows', 200)

In [2]:
def convert_to_month_name(number: int):
    return calendar.month_name[number]

In [3]:
def return_monthly_avg(month_df: pd.DataFrame) -> float:
    amount_clicks = month_df['Aantal'].sum()
    amount_companies = len(month_df['Klant'])
    try:
        return round(amount_clicks / amount_companies, 2)
    except ZeroDivisionError:
        return 0

In [4]:
def return_yearly_result(df: pd.DataFrame, year: str) -> list:
    result = [year]
    month_list = df['Datum'].unique()
    for month in month_list:
        month_df = df[df['Datum'] == month]
        result.append(return_monthly_avg(month_df))
    return result

In [5]:
path = 'CAOWijzerDataTeGebruiken.xlsx'

In [6]:
df = pd.read_excel(path)

In [8]:
# df[df['Klant'] == 'Talentzaam (CAOWijzer)']

In [9]:
df = df[(df['Kosten component id'] == 28) | (df['Kosten component id'] == 38) | (df['Kosten component id'] == 50)]

In [10]:
year_2018 = datetime(2018, 1, 1)
year_2019 = datetime(2019, 1, 1)
year_2020 = datetime(2020, 1, 1)
year_2021 = datetime(2021, 1, 1)

In [11]:
df_2018 = df[df['Datum'] < year_2019]
df_2018 = df_2018[df_2018['Datum'] > year_2018]

In [12]:
df_2019 = df[df['Datum'] < year_2020]
df_2019 = df_2019[df_2019['Datum'] > year_2019]

In [13]:
df_2020 = df[df['Datum'] < year_2021]
df_2020 = df_2020[df_2020['Datum'] > year_2020]

In [14]:
df_2021 = df[df['Datum'] > year_2021]

In [15]:
result_2018 = return_yearly_result(df_2018, '2018')

In [16]:
result_2019 = return_yearly_result(df_2019, '2019')

In [17]:
result_2020 = return_yearly_result(df_2020, '2020')

In [18]:
result_2021 = return_yearly_result(df_2021, '2021')

In [20]:
for _ in range(len(result_2020 ) - len(result_2021)):
    result_2021.append(0)

In [21]:
overall_result = [result_2018, result_2019, result_2020, result_2020, result_2021]

In [22]:
print(result_2018)
print(result_2019)
print(result_2020)
print(result_2021)

['2018', 73.19, 55.72, 47.15, 42.15, 41.02, 43.39, 53.39, 44.15, 43.43, 54.19, 49.58, 41.09]
['2019', 57.82, 49.58, 47.73, 50.78, 48.46, 46.67, 57.49, 43.63, 55.81, 46.25, 46.86, 41.74]
['2020', 48.01, 44.98, 44.36, 31.92, 37.44, 48.11, 48.9, 40.12, 50.07, 52.98, 41.81, 36.88]
['2021', 42.62, 43.04, 56.18, 53.06, 49.07, 57.82, 61.98, 0, 0, 0, 0, 0]


In [23]:
df_2018['Datum'] = df_2018['Datum'].apply(lambda x: convert_to_month_name(x.month))

In [24]:
month_list = df_2018['Datum'].unique()

In [25]:
cols = ['Year']
cols.extend(month_list)

In [26]:
result_df = pd.DataFrame(columns=cols)

In [27]:
result_df

Unnamed: 0,Year,January,February,March,April,May,June,July,August,September,October,November,December


In [28]:
for r in overall_result:
    result_df.loc[len(result_df)] = r

In [29]:
result_df

Unnamed: 0,Year,January,February,March,April,May,June,July,August,September,October,November,December
0,2018,73.19,55.72,47.15,42.15,41.02,43.39,53.39,44.15,43.43,54.19,49.58,41.09
1,2019,57.82,49.58,47.73,50.78,48.46,46.67,57.49,43.63,55.81,46.25,46.86,41.74
2,2020,48.01,44.98,44.36,31.92,37.44,48.11,48.9,40.12,50.07,52.98,41.81,36.88
3,2020,48.01,44.98,44.36,31.92,37.44,48.11,48.9,40.12,50.07,52.98,41.81,36.88
4,2021,42.62,43.04,56.18,53.06,49.07,57.82,61.98,0.0,0.0,0.0,0.0,0.0


In [30]:
result_df.set_index('Year')

Unnamed: 0_level_0,January,February,March,April,May,June,July,August,September,October,November,December
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018,73.19,55.72,47.15,42.15,41.02,43.39,53.39,44.15,43.43,54.19,49.58,41.09
2019,57.82,49.58,47.73,50.78,48.46,46.67,57.49,43.63,55.81,46.25,46.86,41.74
2020,48.01,44.98,44.36,31.92,37.44,48.11,48.9,40.12,50.07,52.98,41.81,36.88
2020,48.01,44.98,44.36,31.92,37.44,48.11,48.9,40.12,50.07,52.98,41.81,36.88
2021,42.62,43.04,56.18,53.06,49.07,57.82,61.98,0.0,0.0,0.0,0.0,0.0


In [31]:
# result_df.plot.bar()

In [32]:
result_df.to_excel('ResultMultipleYears.xlsx')