In [17]:
import sqlite3
import pandas as pd

class feedzaiChallenge():
    def __init__(self,database_name:str):
        self.database_name = database_name
        self.conn = sqlite3.connect(f'database\\{self.database_name}.db')

    def read_csv_files(self, csv_files:dict):
        try:
            self.dfs = dict()
            for file in csv_files:
                self.dfs[file] = pd.read_csv(csv_files[file])
        except Exception as ex:
            raise ex
    
    def load_data(self, table_name:str):
        try:
            self.dfs[table_name].to_sql(table_name, self.conn, if_exists='replace', index=False)
        except Exception as ex:
            raise ex
    
    def query_data(self,query, output_path:str):
        try:           
            result = pd.read_sql_query(query, self.conn)
            result.to_csv(output_path, index=False)
        except Exception as ex:
            raise ex
        
    def close_connection(self):
        self.conn.close()


In [18]:
feedzai = feedzaiChallenge('feedzai_database')

In [19]:
csv_files = {
    'time_off':r'csv_sources\time_off.csv',
    'work_hours':r'csv_sources\work_hours.csv'
}

In [20]:
feedzai.read_csv_files(csv_files)

In [21]:
feedzai.load_data('work_hours')
feedzai.load_data('time_off')

In [22]:
query = f"""
WITH daily_accumulated_cost AS (
    SELECT
        project_id,
        date,
        (SUM(worked) / 3600) * 100 AS accumulated_cost
    FROM
        work_hours
    GROUP BY
        project_id, date
    ORDER BY
        project_id, date
)
SELECT
    project_id,
    date,
    SUM(accumulated_cost) OVER (PARTITION BY project_id ORDER BY date) AS total_accumulated_cost
FROM
    daily_accumulated_cost;
"""
feedzai.query_data(query,r'output_files\acumulated_actual_costs.csv')


In [23]:
feedzai.close_connection()