Skip to content

Commit

Permalink
[datastrato#2230] feat(client-python): Add doc and support publishing…
Browse files Browse the repository at this point in the history
… Gravitino Python client to Pypi (datastrato#3391)

### What changes were proposed in this pull request?
We published the first version of python-client package of Gravitino to
Pypi.
1. Add how to use Python Client document
2. add some tools to requirements.txt
3. `./gradlew :clients:client-python:distribution`
4. `./gradlew :clients:client-python:deploy`

### Why are the changes needed?
We need the python client package for user to use Gravitino in Python
project.

datastrato#2230

### Does this PR introduce _any_ user-facing change?
Users can use `pip install gravitino` to install the Gravitino
python-client package directly.

### How was this patch tested?
CI Passed

Co-authored-by: SophieTech88 <141538510+SophieTech88@users.noreply.github.com>
Co-authored-by: xunliu <xun@datastrato.com>
  • Loading branch information
3 people committed May 15, 2024
1 parent 43de43a commit 0ee1665
Show file tree
Hide file tree
Showing 21 changed files with 560 additions and 101 deletions.
3 changes: 3 additions & 0 deletions clients/client-python/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ venv
.vevn
.venv
.idea
dist
build
README.md
25 changes: 0 additions & 25 deletions clients/client-python/README.md

This file was deleted.

79 changes: 79 additions & 0 deletions clients/client-python/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,62 @@ fun gravitinoServer(operation: String) {
}
}

fun generatePypiProjectHomePage() {
try {
val inputFile = file("${project.rootDir}/docs/how-to-use-python-client.md")
val outputFile = file("README.md")

// Copy the contents of the how-to-use-python-client.md file to the README.md file for PyPi
// release, remove the front matter as PyPi doesn't support README file contains front
// matter.
val lines = inputFile.readLines()
var skipFrontMatterHead = false
var skipFrontMatterEnd = false
for (line in lines) {
if (line.trim() == "---") {
if (!skipFrontMatterHead) {
skipFrontMatterHead = true
continue
} else if (!skipFrontMatterEnd) {
skipFrontMatterEnd = true
continue
}
}
if (skipFrontMatterHead && skipFrontMatterEnd) {
outputFile.appendText(line + "\n")
}
}

// Because the README.md file is generated from the how-to-use-python-client.md file, the
// relative path of the images in the how-to-use-python-client.md file is incorrect. We need
// to fix the relative path of the images/markdown to the absolute path.
val content = outputFile.readText()
val docsUrl = "https://datastrato.ai/docs/latest"

// Use regular expression to match the `[](./a/b/c.md#arg1)` link in the content
val patternDocs = "(?<!!)\\[([^]]+)]\\(\\.\\/([^)]*\\.md(?:#[^)]*)?)\\)".toRegex()
val contentUpdateDocs = patternDocs.replace(content) { matchResult ->
val linkText = matchResult.groupValues[1]
val relativePath = matchResult.groupValues[2].replace(".md", "/")
"[$linkText]($docsUrl/$relativePath)"
}

// Use regular expression to match the `![](./a/b/c.png)` link in the content
val assertUrl = "https://raw.githubusercontent.com/datastrato/gravitino/main/docs"
val patternImage = """!\[([^\]]+)]\(\./assets/([^)]+)\)""".toRegex()
val contentUpdateImage = patternImage.replace(contentUpdateDocs) { matchResult ->
val altText = matchResult.groupValues[1]
val fileName = matchResult.groupValues[2]
"![${altText}]($assertUrl/assets/$fileName)"
}

val readmeFile = file("README.md")
readmeFile.writeText(contentUpdateImage)
} catch (e: Exception) {
throw GradleException("client-python README.md file not generated!")
}
}

tasks {
val pipInstall by registering(VenvTask::class) {
venvExec = "pip"
Expand Down Expand Up @@ -89,8 +145,31 @@ tasks {
val build by registering(VenvTask::class) {
}

val distribution by registering(VenvTask::class) {
doFirst {
generatePypiProjectHomePage()
delete("dist")
}

venvExec = "Python3"
args = listOf("setup.py", "sdist")

doLast {
delete("README.md")
}
}

// Deploy to https://pypi.org/project/gravitino/
val deploy by registering(VenvTask::class) {
dependsOn(distribution)
val twine_password = System.getenv("TWINE_PASSWORD")
venvExec = "twine"
args = listOf("upload", "dist/*", "-p${twine_password}")
}

val clean by registering(Delete::class) {
delete("build")
delete("dist")
delete("gravitino.egg-info")

doLast {
Expand Down
18 changes: 10 additions & 8 deletions clients/client-python/gravitino/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
This software is licensed under the Apache License version 2.
"""

from gravitino.gravitino_client import (
GravitinoClient,
gravitino_metalake,
MetaLake,
Catalog,
Schema,
Table,
)
from gravitino.api.catalog import Catalog
from gravitino.api.schema import Schema
from gravitino.api.fileset import Fileset
from gravitino.api.fileset_change import FilesetChange
from gravitino.api.metalake_change import MetalakeChange
from gravitino.api.schema_change import SchemaChange
from gravitino.client.gravitino_client import GravitinoClient
from gravitino.client.gravitino_admin_client import GravitinoAdminClient
from gravitino.client.gravitino_metalake import GravitinoMetalake
from gravitino.name_identifier import NameIdentifier
4 changes: 4 additions & 0 deletions clients/client-python/gravitino/catalog/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""
Copyright 2024 Datastrato Pvt Ltd.
This software is licensed under the Apache License version 2.
"""
15 changes: 7 additions & 8 deletions clients/client-python/gravitino/catalog/base_schema_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,13 @@ def alter_schema(self, ident: NameIdentifier, *changes: SchemaChange) -> Schema:
reqs = [
BaseSchemaCatalog.to_schema_update_request(change) for change in changes
]
updatesRequest = SchemaUpdatesRequest(reqs)
updatesRequest.validate()
updates_request = SchemaUpdatesRequest(reqs)
updates_request.validate()
resp = self.rest_client.put(
BaseSchemaCatalog.format_schema_request_path(ident.namespace())
+ "/"
+ ident.name(),
updatesRequest,
updates_request,
)
schema_response = SchemaResponse.from_json(resp.body, infer_missing=True)
schema_response.validate()
Expand Down Expand Up @@ -189,8 +189,8 @@ def drop_schema(self, ident: NameIdentifier, cascade: bool) -> bool:
drop_resp = DropResponse.from_json(resp.body, infer_missing=True)
drop_resp.validate()
return drop_resp.dropped()
except Exception as e:
logger.warning(f"Failed to drop schema {ident}")
except Exception:
logger.warning("Failed to drop schema %s", ident)
return False

@staticmethod
Expand All @@ -203,7 +203,6 @@ def to_schema_update_request(change: SchemaChange):
return SchemaUpdateRequest.SetSchemaPropertyRequest(
change.property(), change.value()
)
elif isinstance(change, SchemaChange.RemoveProperty):
if isinstance(change, SchemaChange.RemoveProperty):
return SchemaUpdateRequest.RemoveSchemaPropertyRequest(change.property())
else:
raise ValueError(f"Unknown change type: {type(change).__name__}")
raise ValueError(f"Unknown change type: {type(change).__name__}")
11 changes: 5 additions & 6 deletions clients/client-python/gravitino/catalog/fileset_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def drop_fileset(self, ident: NameIdentifier) -> bool:

return drop_resp.dropped()
except Exception as e:
logger.warning(f"Failed to drop fileset {ident}: {e}")
logger.warning("Failed to drop fileset %s: %s", ident, e)
return False

@staticmethod
Expand All @@ -200,15 +200,14 @@ def format_fileset_request_path(namespace: Namespace) -> str:
def to_fileset_update_request(change: FilesetChange):
if isinstance(change, FilesetChange.RenameFileset):
return FilesetUpdateRequest.RenameFilesetRequest(change.new_name())
elif isinstance(change, FilesetChange.UpdateFilesetComment):
if isinstance(change, FilesetChange.UpdateFilesetComment):
return FilesetUpdateRequest.UpdateFilesetCommentRequest(
change.new_comment()
)
elif isinstance(change, FilesetChange.SetProperty):
if isinstance(change, FilesetChange.SetProperty):
return FilesetUpdateRequest.SetFilesetPropertyRequest(
change.property(), change.value()
)
elif isinstance(change, FilesetChange.RemoveProperty):
if isinstance(change, FilesetChange.RemoveProperty):
return FilesetUpdateRequest.RemoveFilesetPropertyRequest(change.property())
else:
raise ValueError(f"Unknown change type: {type(change).__name__}")
raise ValueError(f"Unknown change type: {type(change).__name__}")
4 changes: 4 additions & 0 deletions clients/client-python/gravitino/rest/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""
Copyright 2024 Datastrato Pvt Ltd.
This software is licensed under the Apache License version 2.
"""
3 changes: 2 additions & 1 deletion clients/client-python/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
requests
dataclasses-json
pylint
black
black
twine
6 changes: 5 additions & 1 deletion clients/client-python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
# Copyright 2024 Datastrato Pvt Ltd.
# This software is licensed under the Apache License version 2.
# This software is licensed under the Apache License version 2.

# the tools to publish the python client to Pypi
requests
dataclasses-json
23 changes: 20 additions & 3 deletions clients/client-python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,33 @@
from setuptools import find_packages, setup


try:
with open("README.md") as f:
long_description = f.read()
except FileNotFoundError:
long_description = "Gravitino Python client"

setup(
name="gravitino",
description="Python lib/client for Gravitino",
version="0.5.0",
long_description=open("README.md").read(),
version="0.5.0.dev12",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/datastrato/gravitino",
author="datastrato",
author_email="support@datastrato.com",
python_requires=">=3.8",
packages=find_packages(include=["gravitino", ".*"]),
packages=find_packages(exclude=["tests*"]),
classifiers=[
'Development Status :: 3 - Alpha',
'Intended Audience :: Developers',
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
],
install_requires=open("requirements.txt").read(),
extras_require={
"dev": open("requirements-dev.txt").read(),
Expand Down
20 changes: 11 additions & 9 deletions clients/client-python/tests/integration/test_fileset_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
from random import randint
from typing import Dict, List

from gravitino.api.fileset import Fileset
from gravitino.api.fileset_change import FilesetChange
from gravitino.client.gravitino_admin_client import GravitinoAdminClient
from gravitino.client.gravitino_client import GravitinoClient
from gravitino.dto.catalog_dto import CatalogDTO
from gravitino.name_identifier import NameIdentifier
from gravitino import (
NameIdentifier,
GravitinoAdminClient,
GravitinoClient,
Catalog,
Fileset,
FilesetChange,
)
from tests.integration.integration_test_env import IntegrationTestEnv

logger = logging.getLogger(__name__)
Expand All @@ -21,7 +23,7 @@
class TestFilesetCatalog(IntegrationTestEnv):
metalake_name: str = "TestFilesetCatalog-metalake" + str(randint(1, 10000))
catalog_name: str = "catalog"
catalog_location_pcatarop: str = "location" # Fileset Catalog must set `location`
catalog_location_prop: str = "location" # Fileset Catalog must set `location`
catalog_provider: str = "hadoop"

schema_name: str = "schema"
Expand Down Expand Up @@ -109,10 +111,10 @@ def init_test_env(self):
)
catalog = self.gravitino_client.create_catalog(
ident=self.catalog_ident,
type=CatalogDTO.Type.FILESET,
type=Catalog.Type.FILESET,
provider=self.catalog_provider,
comment="",
properties={self.catalog_location_pcatarop: "/tmp/test1"},
properties={self.catalog_location_prop: "/tmp/test1"},
)
catalog.as_schemas().create_schema(
ident=self.schema_ident, comment="", properties={}
Expand Down
10 changes: 6 additions & 4 deletions clients/client-python/tests/integration/test_metalake.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
import logging
from typing import Dict, List

from gravitino.client.gravitino_admin_client import GravitinoAdminClient
from gravitino.client.gravitino_metalake import GravitinoMetalake
from gravitino import (
GravitinoAdminClient,
GravitinoMetalake,
MetalakeChange,
NameIdentifier,
)
from gravitino.dto.dto_converters import DTOConverters
from gravitino.dto.requests.metalake_updates_request import MetalakeUpdatesRequest
from gravitino.api.metalake_change import MetalakeChange
from gravitino.name_identifier import NameIdentifier
from tests.integration.integration_test_env import IntegrationTestEnv

logger = logging.getLogger(__name__)
Expand Down
15 changes: 9 additions & 6 deletions clients/client-python/tests/integration/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@
from random import randint
from typing import Dict, List

from gravitino.api.catalog import Catalog
from gravitino.api.schema import Schema
from gravitino.api.schema_change import SchemaChange
from gravitino.client.gravitino_admin_client import GravitinoAdminClient
from gravitino.client.gravitino_client import GravitinoClient
from gravitino.name_identifier import NameIdentifier
from gravitino import (
NameIdentifier,
GravitinoAdminClient,
GravitinoClient,
Catalog,
SchemaChange,
Schema,
)

from tests.integration.integration_test_env import IntegrationTestEnv

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion clients/client-python/tests/test_gravitino_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import unittest

from gravitino import GravitinoClient, gravitino_metalake
from gravitino.gravitino_client import gravitino_metalake, GravitinoClient
from .utils import services_fixtures


Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 0ee1665

Please sign in to comment.