In [22]:
from abc import ABC, abstractmethod
from typing import Optional
from urllib.parse import urlparse
import pandas as pd
import requests
import pysftp

class DownloadStrategy(ABC):
    """
    """
    @abstractmethod
    def read(self, uri: str):
        pass

    
class ParseStrategy(ABC):
    """
    """
    @abstractmethod
    def read(self, uri: str):
        pass

    
class SFTPStrategy(DownloadStrategy):
    def read(self, uri : str):
        o = urlparse(uri)

        # Download via sftp
        cache = './cachedir'
        cnopts = pysftp.CnOpts()
        cnopts.hostkeys = None

        with pysftp.Connection(host=o.hostname, username=o.username, password=o.password, cnopts=cnopts) as sftp:    
            sftp.cwd('/upload')
            sftp.get(o.path, f'{cache}/file.csv')

class HTTPStrategy(DownloadStrategy):
    def read(self, uri: str):
        o = urlparse(uri)
        
        # Download via http
        cache = './cachedir'
        r = requests.get(uri, allow_redirects=True)    
        open (f'{cache}/file.csv', 'wb').write(r.content)
        return f'{cache}/file.csv'
        
class CSVParseStrategy(ParseStrategy):
    def read(self, file: str):
        df = pd.read_csv(file)        
        # Move stuff into dataspace
        return df
    
    
download_strategy = {
    'sftp': SFTPStrategy,
    'http': HTTPStrategy,
    'https': HTTPStrategy
}

parser_strategy = {
    'text/csv': CSVParseStrategy
}

class DataSourceContext():
    def __init__(self, uri: str, mediaType: str) -> None:
        self._uri = uri
        self._mediaType = mediaType
        self._dlstrategy = None
        self._parsestrategy = None
        
        o = urlparse(uri)
        if o.scheme in download_strategy:
            self._dlstrategy = download_strategy[o.scheme]()
        
        if mediaType in parser_strategy:
            self._parsestrategy = parser_strategy[mediaType]()

    @property
    def strategy(self) -> DownloadStrategy:
        return self._dlstrategy

    @strategy.setter
    def strategy(self, strategy: DownloadStrategy) -> None:
        self._dlstrategy = strategy

    def read(self) -> None:
        file = self._dlstrategy.read(self._uri)
        return self._parsestrategy.read(file)
        


In [31]:
from OTEServiceConnector import DataSourceContext, Converter, Mapper

# Link to a datasource
datasource = DataSourceContext('https://people.sc.fsu.edu/~jburkardt/data/csv/hurricanes.csv', 'text/csv')

# Setup converter. csv2interal generates internal data-models
# representation from a csv-file
converter = Converter('csv2internal')

# Map domain and properties to specific concepts
mapper = Mapper(domain='https://www.wikidata.org/wiki/Q58197759',
                property_maps=[
                    {'Month': 'https://www.wikidata.org/wiki/Q5151'},
                    {'Average': 'https://www.wikidata.org/wiki/Q202785'},
                ])

transformation = Transformation(applicationName='somesim', 
                                applicationType='commandline', 
                                options={})

# Build a pipeline using pipe and fil


A >> B

l = datasource >> converter >> mapper

l >> transformation1
l >> transformation2



'{"LatD":{"0":41,"1":42,"2":46,"3":42,"4":43,"5":36,"6":49,"7":39,"8":34,"9":39,"10":48,"11":41,"12":37,"13":33,"14":37,"15":40,"16":26,"17":47,"18":41,"19":31,"20":44,"21":42,"22":44,"23":43,"24":42,"25":41,"26":38,"27":41,"28":46,"29":31,"30":38,"31":28,"32":32,"33":49,"34":46,"35":30,"36":43,"37":39,"38":32,"39":42,"40":33,"41":34,"42":36,"43":32,"44":37,"45":40,"46":44,"47":43,"48":39,"49":41,"50":33,"51":39,"52":27,"53":30,"54":47,"55":43,"56":32,"57":33,"58":40,"59":37,"60":44,"61":40,"62":40,"63":38,"64":39,"65":37,"66":42,"67":39,"68":47,"69":41,"70":43,"71":42,"72":32,"73":33,"74":44,"75":35,"76":32,"77":38,"78":47,"79":41,"80":41,"81":42,"82":32,"83":46,"84":27,"85":38,"86":35,"87":34,"88":33,"89":37,"90":37,"91":41,"92":32,"93":34,"94":29,"95":31,"96":40,"97":38,"98":36,"99":38,"100":38,"101":44,"102":44,"103":38,"104":39,"105":42,"106":44,"107":45,"108":29,"109":43,"110":38,"111":43,"112":33,"113":35,"114":41,"115":42,"116":43,"117":44,"118":37,"119":37,"120":39,"121":38,"1