Skip to content

Commit

Permalink
feat: add s3 file support
Browse files Browse the repository at this point in the history
  • Loading branch information
simontaurus committed Feb 26, 2024
1 parent 403c332 commit fdb4fdd
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 36 deletions.
114 changes: 78 additions & 36 deletions examples/file_upload_download.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import os
from io import StringIO

import osw.model.entity as model
from osw.auth import CredentialManager
from osw.core import OSW
from osw.utils.wiki import get_full_title
from osw.wtsite import WtSite

# install: pip install osw[S3]

# credential manager
# can use a file or hardcode the credentials, otherwise the user will be prompted to enter them
cm = CredentialManager(
Expand All @@ -21,48 +25,86 @@
)

# load the required schemas / data classes
osw_obj.fetch_schema(
OSW.FetchSchemaParam(
schema_title=[
"Category:OSW11a53cdfbdc24524bf8ac435cbf65d9d", # WikiFile
"Category:OSW3e3f5dd4f71842fbb8f270e511af8031", # LocalFile
],
mode="replace",
if not hasattr(model, "S3File"):
osw_obj.fetch_schema(
OSW.FetchSchemaParam(
schema_title=[
"Category:OSW11a53cdfbdc24524bf8ac435cbf65d9d", # WikiFile
"Category:OSW3e3f5dd4f71842fbb8f270e511af8031", # LocalFile
"Category:OSWc43f749badcb4490a785505de1fc7d20", # S3File
],
mode="replace",
)
)
)

# import the controller modules
# note: since they depend on the data classes, they must be imported after the schemas are loaded
from osw.controller.file.local import ( # noqa (ignore flake8 warning)
LocalFileController,
)
from osw.controller.file.s3 import S3FileController # noqa (ignore flake8 warning)
from osw.controller.file.wiki import WikiFileController # noqa (ignore flake8 warning)

# create a local file
# with open("dummy.txt", "w") as f:
# f.write("Hello World!")
lf = LocalFileController(path="dummy.txt") # here an uuid already exists
lf.put(StringIO("Hello World!"))

# create a remote file (here: a wiki file)
wf = WikiFileController(osw=osw_obj)
# or cast to wiki wile to keep all common attributes
wf = lf.cast(WikiFileController, osw=osw_obj)
# which is equivalent to
wf = WikiFileController.from_other(lf, osw=osw_obj)

# upload the local file to the remote file
wf.put_from(lf)
# write new content on the fly
wf.put(StringIO("Some new content"))

# get an existing file
file = osw_obj.load_entity(f"{wf.namespace}:{wf.title}") # the file
wf2 = file.cast(WikiFileController, osw=osw_obj) # the file controller
lf2 = LocalFileController.from_other(wf2, path="dummy2.txt")
lf2.put_from(wf2)

# delete the files
lf.delete()
wf.delete() # note: wf2 actually points to the same file as wf
lf2.delete()

def video_file():
file = osw_obj.load_entity(
"File:OSW7b2398a60f004006b9a6ef89210858f3.mp4"
) # the file
wf2 = file.cast(WikiFileController, osw=osw_obj) # the file controller
# wf2.suffix = "mp4"
lf2 = LocalFileController.from_other(wf2, path="dummy2.mp4")
lf2.put_from(wf2)


def wiki_file():
# create a local file
# with open("dummy.txt", "w") as f:
# f.write("Hello World!")
lf = LocalFileController(path="dummy.txt") # here an uuid already exists
lf.put(StringIO("Hello World!"))

# create a remote file (here: a wiki file)
wf = WikiFileController(osw=osw_obj)
# or cast to wiki wile to keep all common attributes
wf = lf.cast(WikiFileController, osw=osw_obj)
# which is equivalent to
wf = WikiFileController.from_other(lf, osw=osw_obj)

# upload the local file to the remote file
wf.put_from(lf)
# write new content on the fly
wf.put(StringIO("Some new content"))

# get an existing file
file = osw_obj.load_entity(f"{wf.namespace}:{wf.title}") # the file
wf2 = file.cast(WikiFileController, osw=osw_obj) # the file controller
lf2 = LocalFileController.from_other(wf2, path="dummy2.txt")
lf2.put_from(wf2)
get_full_title(wf2)

# delete the files
lf.delete()
wf.delete() # note: wf2 actually points to the same file as wf
lf2.delete()


def s3_file():
# make sure your credential file contains a access_key_id=username,
# and secret_access_key=password for the s3 domain
file = osw_obj.load_entity("Item:OSW5f53ed0b5c354fc3b1a122b9066744f3") # the file
s3f = file.cast(S3FileController, cm=cm) # the file controller

# download
lf = LocalFileController.from_other(s3f, path="s3_test.txt")
lf.put_from(s3f)

# upload
s3f.put_from(lf)

s3f = model.S3File(
url="https://s3.example10.open-semantic-lab.org/test-bucket/test-example2.txt",
label=[model.Label(text="NewFile")],
).cast(S3FileController, cm=cm)
s3f.put(StringIO("Hello World!"))

s3f.delete()
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ exclude =
dataimport = geopy; deepl
DB = psycopg2; sqlalchemy
UI = pysimplegui
S3 = boto3

# Add here dev requirements (semicolon/line-separated)
dev =
Expand Down
66 changes: 66 additions & 0 deletions src/osw/controller/file/s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from io import BytesIO, StringIO
from typing import IO, Any, Optional

import boto3

from osw.auth import CredentialManager
from osw.controller.file.remote import RemoteFileController
from osw.core import model


class S3FileController(model.S3File, RemoteFileController):
protocol: Optional[str]
domain: Optional[str]
bucket: Optional[str]
key: Optional[str]

cm: CredentialManager
s3_client: Optional[Any]
s3_resource: Optional[Any]

def __init__(self, **kwargs):
super().__init__(**kwargs)
self._parse_url()
creds: CredentialManager.UserPwdCredential = self.cm.get_credential(
CredentialManager.CredentialConfig(iri=self.domain)
)

self.s3_resource = boto3.resource(
"s3",
aws_access_key_id=creds.username,
aws_secret_access_key=creds.password,
aws_session_token=None, # replace this with token if necessary
endpoint_url=self.protocol + "//" + self.domain,
config=boto3.session.Config(signature_version="s3v4"),
# verify=False
)
self.s3_client = self.s3_resource.meta.client

def get(self) -> IO:
response = self.s3_resource.Object(bucket_name=self.bucket, key=self.key).get()
return response["Body"]

# def get_to(self, other: "FileController"):
# response = self.s3_resource.Object(bucket_name=self.bucket, key=self.key).get()
# with response['Body'] as file:
# other.put(file)

def put(self, file: IO):
if isinstance(file, StringIO):
file = BytesIO(file.getvalue().encode())
# file.seek(0)
self.s3_client.upload_fileobj(file, self.bucket, self.key)

# def put_from(self, other: FileController):
# pass

def delete(self):
self.s3_client.delete_object(Bucket=self.bucket, Key=self.key)

def _parse_url(self):
self.protocol = self.url.split("//")[0]
self.domain = self.url.split("//")[1].split("/")[0]
self.bucket = self.url.split("//")[1].split("/")[1]
self.key = self.url.replace(
self.protocol + "//" + self.domain + "/" + self.bucket + "/", ""
)

0 comments on commit fdb4fdd

Please sign in to comment.