# Export Data

Generalized export class to transfer data from scraper to application processing pipeline

1. Query Data (->Read_Query)
2. Transform - pipe dependent, base class inheritance with general methods
3. Save & Upload

In [57]:
import pandas as pd
from awstools import write_data, generate_job
from read_query import dump_businesses


### Base Class ###

class Exporter():
    def __init__(self):
        pass
    
    def fetch(self):
        NotImplemented
    
    def transform(self):
        NotImplemented
    
    def save_s3(self, tdata: pd.DataFrame, savepath):
        write_data(
            savepath = savepath,
            filetype = 'json',
            dry_run = False,
        )
        self.generate_job()
        
    def export(self):
        pass
        
    def generate_job(self, savepath, job_type, tablename, **kwargs):
        generate_job(
            obectpath = savepath,
            job_type = job_type,
            tablename = tablename,
            **kwargs
        )
    
    def fetch_transform(self):
        return self.transform(
                data = self.fetch()
            )
        
    
    
### Derived Classes ###

class BusinessExport(Exporter):
    def __init__(self):
        super().__init__()
        
    def fetch(self, columns=[
                            'business_id', 'name', 'address', 'city', 'state',
                            'latitude', 'longitude', 'postal_code', 'review_count', 'stars', 
                            'is_open', 'categories']):
        self.data_ = dump_businesses()
        return pd.DataFrame(self.data_, columns=columns)
    
    def transform(self, data=None):
        if data is None and self.data_:
            self.transformed_data = self.data_
            return self.data_
        elif data is not None:
            self.transformed_data = data
            return data # Null transform returns data
        else:
            raise ValueError
        
    def export(self, filename, data=None):
        if data is not None:
            data_to_export = data
        else:
            data_to_export = self.transformed_data
        data_to_export.to_json(orient='records',path_or_buf=filename) 
        
        

In [58]:
business_exporter = BusinessExport()

In [59]:
bdata = business_exporter.fetch_transform()
bdata.head()

Unnamed: 0,business_id,name,address,city,state,latitude,longitude,postal_code,review_count,stars,is_open,categories
0,LnSPs3KafwvvsPjSZZIpIw,Nine Minute Oil & Lube,3839 El Camino Real,Palo Alto,CA,37.417688,-122.131003,94306,1237,5.0,1,"autorepair,oilchange"
1,mD1TFPyxCqAnQkxOyEbMJQ,Super Auto Glass,61 Airport BlvdSte A,South San Francisco,CA,37.650768,-122.409095,94080,575,5.0,1,"autoglass,windshieldinstallrepair"
2,ZBGyB5JEdXWq52yvp78kxw,Putnam Subaru,85 California Dr,Burlingame,CA,37.57758,-122.34134,94010,1038,4.5,1,"car_dealers,autorepair,autopartssupplies"
3,Ut12obILF409DeIMZYozEg,JP Auto Body Shop,112 S Linden AveNone,South San Francisco,CA,37.64603,-122.41381,94080,375,5.0,1,bodyshops
4,WjgkyKvlJ9vy3_juKKdMFw,Golden State Smog Center,1150 Old County Rd,Belmont,CA,37.519844,-122.274261,94002,493,5.0,1,smog_check_stations


In [60]:
business_exporter.export(filename='test_biz.json')