This notebook runs Python 3.11.7 on anaconda3. It is a pipeline to take analytics data and deliver json data for the software team.

In [3]:
'''Data pipeline to pull data from local csv and load clean data into another csv.'''
import pandas as pd # type: ignore
import json


class DataPipeline:
    def __init__(self, source:str, target:str) -> None:
        self.df = pd.DataFrame()
        self.source = source
        self.target = target
    def extract(self) -> None:
        self.df = pd.read_csv(self.source)
        unnamed_columns = [c for c in self.df.columns if 'Unnamed' in c]
        self.df.drop(columns=unnamed_columns, inplace=True)
    def transform(self) -> None:
        for col in self.df.columns:
            if 'date' in  col:
                self.df[col] = self.df[col].astype(str)
        self.df = self.df.to_dict(orient='records')
    def load(self) -> None:
        with open(self.target, 'w', encoding='utf-8') as json_file:
            json.dump(self.df, json_file, ensure_ascii=False, indent=4)
    def call_sos(self, error_message:str) -> None:
        print(error_message)


def main() -> None:
    try:
        source_path = '../data/processed.csv'
        target_path = '../data/processed.json'
        pipeline = DataPipeline(
            source_path
            , target_path
        )
        pipeline.extract()
        pipeline.transform()
        pipeline.load()
        print('Script is good!')
    except Exception as e:
        pipeline.call_sos(e)
    finally:
        '''Close connections.'''
        pass

if __name__ == '__main__':
    main()


Script is good!
