In [1]:
from dotenv import load_dotenv
from the_project_tracker.core.data_models import Documentation
from the_project_tracker.db.pg_conn import PGDataConnection, SettingsSSH
import polars as pl
load_dotenv()

db = PGDataConnection(ssh_config=SettingsSSH())

In [2]:
deprecate_warnings = pl.from_pandas(db.run_query(f"SELECT * FROM {db.documentations_table} where doc_content like '%deprecat%' "))
deprecate_warnings['doc_content'][0].split("Parameters")


['polars.repeat#\n\n\npolars.repeat(\n\nvalue: IntoExpr | None,\nn: int | Expr,\n*,\ndtype: PolarsDataType | None = None,\neager: Literal[False] = False,\nname: str | None = None,\n\n) → Expr[source]#\n\npolars.repeat(\n\nvalue: IntoExpr | None,\nn: int | Expr,\n*,\ndtype: PolarsDataType | None = None,\neager: Literal[True],\nname: str | None = None,\n\n) → Series\n\npolars.repeat(\n\nvalue: IntoExpr | None,\nn: int | Expr,\n*,\ndtype: PolarsDataType | None = None,\neager: bool,\nname: str | None = None,\n\n) → Expr | Series\nConstruct a column of length n filled with the given value.\n\n',
 ':\n\nvalueValue to repeat.\n\nnLength of the resulting column.\n\ndtypeData type of the resulting column. If set to None (default), data type is\ninferred from the given value. Defaults to Int32 for integer values, unless\nInt64 is required to fit the given value. Defaults to Float64 for float values.\n\neagerEvaluate immediately and return a Series. If set to False (default),\nreturn an expressio

### Get Release Notes info. [Deprecations and breaking changes]

In [8]:
base_version = '0.15.5'
desired_version = '0.19.5'

In [9]:
base_version_published_at = db.run_query(f"SELECT published_at FROM {db.releases_table} where tag_name = 'py-{base_version}'")
desired_version_published_at = db.run_query(f"SELECT published_at FROM {db.releases_table} where tag_name = 'py-{desired_version}'")

if len(desired_version_published_at['published_at']) == 0:
    raise Exception("Not tracked desired version")


base_query = f"SELECT * FROM {db.releases_table} where repo_url = 'https://github.com/pola-rs/polars' and name ilike 'Python%' and published_at <= '{desired_version_published_at['published_at'][0]}'"
if len(base_version_published_at['published_at']) > 0:
    base_query += f" and published_at >= '{base_version_published_at['published_at'][0]}'"

releases = pl.from_pandas(db.run_query(base_query))

In [10]:
# Extracting deprecations and breaking changes
import re
def extract_deprecation_str(msg: str) -> str:
    deprecations_pattern = r'Deprecations.*?(?=## [^\r\n]+)'
    deprecations = re.search(deprecations_pattern, msg, re.DOTALL)
    if deprecations:
        return deprecations.group(0)

def extract_breaking_changes_str(msg: str) -> str:
    breaking_changes_pattern = r'Breaking.*?(?=##|$)'
    breaking_changes = re.search(breaking_changes_pattern, msg, re.DOTALL)
    if breaking_changes:
        return breaking_changes.group(0)

extract_deprecation_str( releases['body'][6].replace('\n', '').replace('\r', ''))

'Deprecations- Add `Series.extend` (#9901)- Deprecate functions series input (#9878)'

In [11]:
releases['body'].map_elements(lambda x: extract_deprecation_str(x)).str.concat()[0]

'Deprecations\r\n\r\n- Deprecate behavior of list/tuple inputs for `lit` (#10461)\r\n\r\n-Deprecations\r\n\r\n- Rename `LazyFrame.read/write_json` to `de/serialize` (#10238)\r\n- Add `categorical_as_str` parameter to testing utils (#10350)\r\n\r\n-Deprecations\r\n\r\n- renaming `approx_unique` as `approx_n_unique` (#10290)\r\n- Rename first `qcut` parameter to `quantiles` (#10253)\r\n- Deprecate `avg` alias for `mean` (#10236)\r\n\r\n-Deprecations\r\n\r\n- Add `Series.extend` (#9901)\r\n- Deprecate functions series input (#9878)\r\n\r\n-Deprecations\r\n\r\n- Add `disable_string_cache` (#11020)\r\n\r\n-Deprecations\r\n\r\n- Rename `is_first/last` to `is_first/last_distinct` (Title and body: depr(python, rust!): Rename `is_first/last` to `is_first/last_distinct` )\r\n- Rename `count_match` to `count_matches` (Title and body: depr(python, rust!): Rename `count_match` to `count_matches` )\r\n- Rename `strip` to `strip_chars` (Title and body: depr(python, rust!): Rename `strip` to `strip_ch

In [12]:
releases['body'].map_elements(lambda x: extract_breaking_changes_str(x)).str.concat()[0]

'Breaking changes\r\n\r\n- propagate null in equality comparisons (#9053)\r\n- formalize implode -> explode relation (#9038)\r\n- Drop subclassing support for `DataFrame`/`LazyFrame` (#9008)\r\n- consistently return list of date/datetime from lazy date\\_range (#8513)\r\n- Default `date_range`/`ones`/`zeros` to `eager=False` (#9007)\r\n- Rename list namespace accesor from `.arr` to `.list` (#8999)\r\n- disallow time zones other than those in zoneinfo.available\\_timezones() (#8993)\r\n- remove window expression magic (#8992)\r\n- raise error when sorted flag not set (#8994)\r\n- Drop subclassing support for GroupBy (#7746)\r\n- in Series constructor, if inputs are time-zone-aware datetimes, convert to UTC (#8881)\r\n- parse offset-naive date time strings as Timestamp(time\\_unit), offset-aware datetime strings as Timestamp(time\\_unit, "UTC"), and remove the utc argument (#8714)\r\n- Remove deprecated tz\\_aware argument (#8696)\r\n\r\n-Breaking changes\r\n\r\n- Update `Expr.sample` si