Skip to content

Commit 6526cc2

Browse files
committed
Merge branch 'main' of github.com:apache/iceberg-python into fd-infer-types
2 parents 7382112 + 62191ee commit 6526cc2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+2653
-1245
lines changed

.github/ISSUE_TEMPLATE/iceberg_bug_report.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ body:
2828
description: What Apache Iceberg version are you using?
2929
multiple: false
3030
options:
31-
- "0.8.1 (latest release)"
31+
- "0.9.0 (latest release)"
32+
- "0.8.1"
3233
- "0.8.0"
3334
- "0.7.1"
3435
- "0.7.0"

.github/dependabot.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ updates:
2222
- package-ecosystem: "pip"
2323
directory: "/"
2424
schedule:
25-
interval: "daily"
25+
interval: "weekly"
2626
open-pull-requests-limit: 50
2727
- package-ecosystem: "github-actions"
2828
directory: "/"
2929
schedule:
30-
interval: "daily"
30+
interval: "weekly"

.github/pull_request_template.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<!--
2+
Thanks for opening a pull request!
3+
-->
4+
5+
<!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
6+
<!-- Closes #${GITHUB_ISSUE_ID} -->
7+
8+
# Rationale for this change
9+
10+
# Are these changes tested?
11+
12+
# Are there any user-facing changes?
13+
14+
<!-- In the case of user-facing changes, please add the changelog label. -->

.github/workflows/pypi-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
if: startsWith(matrix.os, 'ubuntu')
6363

6464
- name: Build wheels
65-
uses: pypa/cibuildwheel@v2.23.0
65+
uses: pypa/cibuildwheel@v2.23.2
6666
with:
6767
output-dir: wheelhouse
6868
config-file: "pyproject.toml"

.github/workflows/svn-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
if: startsWith(matrix.os, 'ubuntu')
5858

5959
- name: Build wheels
60-
uses: pypa/cibuildwheel@v2.23.0
60+
uses: pypa/cibuildwheel@v2.23.2
6161
with:
6262
output-dir: wheelhouse
6363
config-file: "pyproject.toml"

dev/.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
.github/*
12
.rat-excludes
23
build
34
.git

dev/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ WORKDIR ${SPARK_HOME}
4040
ENV SPARK_VERSION=3.5.4
4141
ENV ICEBERG_SPARK_RUNTIME_VERSION=3.5_2.12
4242
ENV ICEBERG_VERSION=1.8.0
43-
ENV PYICEBERG_VERSION=0.8.1
43+
ENV PYICEBERG_VERSION=0.9.0
4444

4545
RUN curl --retry 5 -s -C - https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \
4646
&& tar xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz --directory /opt/spark --strip-components 1 \

dev/provision.py

Lines changed: 88 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# KIND, either express or implied. See the License for the
1515
# specific language governing permissions and limitations
1616
# under the License.
17+
import math
1718

1819
from pyspark.sql import SparkSession
1920
from pyspark.sql.functions import current_date, date_add, expr
@@ -113,89 +114,99 @@
113114
"""
114115
)
115116

116-
spark.sql(
117-
f"""
118-
CREATE OR REPLACE TABLE {catalog_name}.default.test_positional_mor_deletes (
119-
dt date,
120-
number integer,
121-
letter string
122-
)
123-
USING iceberg
124-
TBLPROPERTIES (
125-
'write.delete.mode'='merge-on-read',
126-
'write.update.mode'='merge-on-read',
127-
'write.merge.mode'='merge-on-read',
128-
'format-version'='2'
129-
);
130-
"""
131-
)
117+
# Merge on read has been implemented in version ≥2:
118+
# v2: Using positional deletes
119+
# v3: Using deletion vectors
132120

133-
spark.sql(
134-
f"""
135-
INSERT INTO {catalog_name}.default.test_positional_mor_deletes
136-
VALUES
137-
(CAST('2023-03-01' AS date), 1, 'a'),
138-
(CAST('2023-03-02' AS date), 2, 'b'),
139-
(CAST('2023-03-03' AS date), 3, 'c'),
140-
(CAST('2023-03-04' AS date), 4, 'd'),
141-
(CAST('2023-03-05' AS date), 5, 'e'),
142-
(CAST('2023-03-06' AS date), 6, 'f'),
143-
(CAST('2023-03-07' AS date), 7, 'g'),
144-
(CAST('2023-03-08' AS date), 8, 'h'),
145-
(CAST('2023-03-09' AS date), 9, 'i'),
146-
(CAST('2023-03-10' AS date), 10, 'j'),
147-
(CAST('2023-03-11' AS date), 11, 'k'),
148-
(CAST('2023-03-12' AS date), 12, 'l');
149-
"""
150-
)
121+
for format_version in [2, 3]:
122+
identifier = f'{catalog_name}.default.test_positional_mor_deletes_v{format_version}'
123+
spark.sql(
124+
f"""
125+
CREATE OR REPLACE TABLE {identifier} (
126+
dt date,
127+
number integer,
128+
letter string
129+
)
130+
USING iceberg
131+
TBLPROPERTIES (
132+
'write.delete.mode'='merge-on-read',
133+
'write.update.mode'='merge-on-read',
134+
'write.merge.mode'='merge-on-read',
135+
'format-version'='{format_version}'
136+
);
137+
"""
138+
)
139+
140+
spark.sql(
141+
f"""
142+
INSERT INTO {identifier}
143+
VALUES
144+
(CAST('2023-03-01' AS date), 1, 'a'),
145+
(CAST('2023-03-02' AS date), 2, 'b'),
146+
(CAST('2023-03-03' AS date), 3, 'c'),
147+
(CAST('2023-03-04' AS date), 4, 'd'),
148+
(CAST('2023-03-05' AS date), 5, 'e'),
149+
(CAST('2023-03-06' AS date), 6, 'f'),
150+
(CAST('2023-03-07' AS date), 7, 'g'),
151+
(CAST('2023-03-08' AS date), 8, 'h'),
152+
(CAST('2023-03-09' AS date), 9, 'i'),
153+
(CAST('2023-03-10' AS date), 10, 'j'),
154+
(CAST('2023-03-11' AS date), 11, 'k'),
155+
(CAST('2023-03-12' AS date), 12, 'l');
156+
"""
157+
)
151158

152-
spark.sql(f"ALTER TABLE {catalog_name}.default.test_positional_mor_deletes CREATE TAG tag_12")
159+
spark.sql(f"ALTER TABLE {identifier} CREATE TAG tag_12")
153160

154-
spark.sql(f"ALTER TABLE {catalog_name}.default.test_positional_mor_deletes CREATE BRANCH without_5")
161+
spark.sql(f"ALTER TABLE {identifier} CREATE BRANCH without_5")
155162

156-
spark.sql(f"DELETE FROM {catalog_name}.default.test_positional_mor_deletes.branch_without_5 WHERE number = 5")
163+
spark.sql(f"DELETE FROM {identifier}.branch_without_5 WHERE number = 5")
157164

158-
spark.sql(f"DELETE FROM {catalog_name}.default.test_positional_mor_deletes WHERE number = 9")
165+
spark.sql(f"DELETE FROM {identifier} WHERE number = 9")
159166

160-
spark.sql(
161-
f"""
162-
CREATE OR REPLACE TABLE {catalog_name}.default.test_positional_mor_double_deletes (
163-
dt date,
164-
number integer,
165-
letter string
166-
)
167-
USING iceberg
168-
TBLPROPERTIES (
169-
'write.delete.mode'='merge-on-read',
170-
'write.update.mode'='merge-on-read',
171-
'write.merge.mode'='merge-on-read',
172-
'format-version'='2'
173-
);
174-
"""
175-
)
167+
identifier = f'{catalog_name}.default.test_positional_mor_double_deletes_v{format_version}'
176168

177-
spark.sql(
178-
f"""
179-
INSERT INTO {catalog_name}.default.test_positional_mor_double_deletes
180-
VALUES
181-
(CAST('2023-03-01' AS date), 1, 'a'),
182-
(CAST('2023-03-02' AS date), 2, 'b'),
183-
(CAST('2023-03-03' AS date), 3, 'c'),
184-
(CAST('2023-03-04' AS date), 4, 'd'),
185-
(CAST('2023-03-05' AS date), 5, 'e'),
186-
(CAST('2023-03-06' AS date), 6, 'f'),
187-
(CAST('2023-03-07' AS date), 7, 'g'),
188-
(CAST('2023-03-08' AS date), 8, 'h'),
189-
(CAST('2023-03-09' AS date), 9, 'i'),
190-
(CAST('2023-03-10' AS date), 10, 'j'),
191-
(CAST('2023-03-11' AS date), 11, 'k'),
192-
(CAST('2023-03-12' AS date), 12, 'l');
193-
"""
194-
)
169+
spark.sql(
170+
f"""
171+
CREATE OR REPLACE TABLE {identifier} (
172+
dt date,
173+
number integer,
174+
letter string
175+
)
176+
USING iceberg
177+
TBLPROPERTIES (
178+
'write.delete.mode'='merge-on-read',
179+
'write.update.mode'='merge-on-read',
180+
'write.merge.mode'='merge-on-read',
181+
'format-version'='2'
182+
);
183+
"""
184+
)
195185

196-
spark.sql(f"DELETE FROM {catalog_name}.default.test_positional_mor_double_deletes WHERE number = 9")
186+
spark.sql(
187+
f"""
188+
INSERT INTO {identifier}
189+
VALUES
190+
(CAST('2023-03-01' AS date), 1, 'a'),
191+
(CAST('2023-03-02' AS date), 2, 'b'),
192+
(CAST('2023-03-03' AS date), 3, 'c'),
193+
(CAST('2023-03-04' AS date), 4, 'd'),
194+
(CAST('2023-03-05' AS date), 5, 'e'),
195+
(CAST('2023-03-06' AS date), 6, 'f'),
196+
(CAST('2023-03-07' AS date), 7, 'g'),
197+
(CAST('2023-03-08' AS date), 8, 'h'),
198+
(CAST('2023-03-09' AS date), 9, 'i'),
199+
(CAST('2023-03-10' AS date), 10, 'j'),
200+
(CAST('2023-03-11' AS date), 11, 'k'),
201+
(CAST('2023-03-12' AS date), 12, 'l');
202+
"""
203+
)
197204

198-
spark.sql(f"DELETE FROM {catalog_name}.default.test_positional_mor_double_deletes WHERE letter == 'f'")
205+
# Perform two deletes, should produce:
206+
# v2: two positional delete files in v2
207+
# v3: one deletion vector since they are merged
208+
spark.sql(f"DELETE FROM {identifier} WHERE number = 9")
209+
spark.sql(f"DELETE FROM {identifier} WHERE letter == 'f'")
199210

200211
all_types_dataframe = (
201212
spark.range(0, 5, 1, 5)
@@ -328,6 +339,7 @@
328339
CREATE TABLE {catalog_name}.default.test_table_empty_list_and_map (
329340
col_list array<int>,
330341
col_map map<int, int>,
342+
col_struct struct<test:int>,
331343
col_list_with_struct array<struct<test:int>>
332344
)
333345
USING iceberg
@@ -340,8 +352,8 @@
340352
spark.sql(
341353
f"""
342354
INSERT INTO {catalog_name}.default.test_table_empty_list_and_map
343-
VALUES (null, null, null),
344-
(array(), map(), array(struct(1)))
355+
VALUES (null, null, null, null),
356+
(array(), map(), struct(1), array(struct(1)))
345357
"""
346358
)
347359

mkdocs/docs/how-to-release.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,8 @@ Then, select the previous release version as the **Previous tag** to use the dif
379379

380380
**Set as the latest release** and **Publish**.
381381

382+
Make sure to check the `changelog` label on GitHub to see if anything needs to be highlighted.
383+
382384
### Release the docs
383385

384386
Run the [`Release Docs` Github Action](https://github.com/apache/iceberg-python/actions/workflows/python-release-docs.yml).

0 commit comments

Comments
 (0)