In [1]:
import sqlite3
import pandas as pd

class feedzaiChallenge():
    def __init__(self,database_name:str):
        self.database_name = database_name
        self.conn = sqlite3.connect(f'database\\{self.database_name}.db')

    def read_csv_files(self, csv_files:dict):
        try:
            self.dfs = dict()
            for file in csv_files:
                self.dfs[file] = pd.read_csv(csv_files[file])
        except Exception as ex:
            raise ex
    
    def load_data(self, table_name:str):
        try:
            self.dfs[table_name].to_sql(table_name, self.conn, if_exists='replace', index=False)
        except Exception as ex:
            raise ex
    
    def query_data(self,query, output_path:str):
        try:           
            result = pd.read_sql_query(query, self.conn)
            result.to_csv(output_path, index=False)
        except Exception as ex:
            raise ex
        
    def close_connection(self):
        self.conn.close()


In [2]:
feedzai = feedzaiChallenge('feedzai_database')

In [3]:
csv_files = {
    'time_off':r'csv_sources\time_off.csv',
    'work_hours':r'csv_sources\work_hours.csv'
}

In [4]:
feedzai.read_csv_files(csv_files)

In [5]:
feedzai.load_data('work_hours')
feedzai.load_data('time_off')

In [6]:
query_1 = f"""
SELECT
    project_id,
    date,
    SUM((worked/1000.0)*100.0) OVER (PARTITION BY project_id ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS total_accumulated_cost
FROM
    work_hours;
"""
feedzai.query_data(query_1,r'output_files\acumulated_actual_costs.csv')


In [7]:
query_2 = f"""
with max_min as (
	SELECT date from work_hours UNION SELECT date_start FROM time_off UNION SELECT date_end FROM time_off
),
all_working_days as (
	with RECURSIVE DateRange AS (
	    SELECT min(date) AS Date FROM max_min
	    UNION ALL
	    SELECT DATE(Date, '+1 day') FROM DateRange WHERE Date < (select max(date) from max_min)
	)
	SELECT Date
	FROM DateRange
	WHERE strftime('%w', Date) NOT IN ('0', '6')
),
available_work_hours_per_user as (
	SELECT
		t.employee_id,
		t.employee_name,
		--t.date_start,
		--t.date_end,
		STRFTIME('%Y-%m', d.Date) as work_month,
		count(d.Date)*8 as hours
	FROM time_off t
	CROSS JOIN all_working_days d
	WHERE d.Date < t.date_start OR d.Date > t.date_end
	group by employee_id, employee_name, work_month
),
worked_hours_by_month_by_project_by_employee as (
	SELECT
		wh.employee_id,
		STRFTIME('%Y-%m', wh.date) as work_month,
		wh.project_id,
		sum(wh.worked)/1000.0 as worked_total
	FROM work_hours wh
	GROUP BY employee_id, work_month, project_id
)
SELECT 
	ah.employee_name,
	ah.work_month,
	wh.project_id,
	100.0*wh.worked_total/ah.hours as project_utilization_percent
FROM worked_hours_by_month_by_project_by_employee wh
JOIN available_work_hours_per_user ah ON ah.employee_id = wh.employee_id AND ah.work_month = wh.work_month
"""
feedzai.query_data(query_2,r'output_files\project_utilization.csv')


In [8]:
feedzai.close_connection()