In [1]:
import polars as pl
import os

(
    pl.scan_parquet("GOLD-1m.parquet")
    .rename({
        "column_1":"date",
        "column_2":"time",
        "column_3":"open",
        "column_4":"high",
        "column_5":"low",
        "column_6":"close",
        "column_7":"volume"
    })
    .with_columns([
        (pl.col("date") + " " + pl.col("time"))
        .str.strptime(pl.Datetime,"%Y.%m.%d %H:%M")
        .alias("datetime"),

        pl.col("open").cast(pl.Float64),
        pl.col("high").cast(pl.Float64),
        pl.col("low").cast(pl.Float64),
        pl.col("close").cast(pl.Float64),
        pl.col("volume").cast(pl.Float64),
    ])
    .drop(["date","time"])
    .sink_parquet("GOLD-1m_tmp.parquet")
)

os.replace("GOLD-1m_tmp.parquet", "GOLD-1m.parquet")


In [3]:
import polars as pl
df = pl.read_parquet("GOLD-1m.parquet")
df = df.rename({
    "datetime": "date"
})


In [4]:
df.head

<bound method DataFrame.head of shape: (3_182_336, 6)
┌──────────┬──────────┬──────────┬──────────┬────────┬─────────────────────┐
│ open     ┆ high     ┆ low      ┆ close    ┆ volume ┆ date                │
│ ---      ┆ ---      ┆ ---      ┆ ---      ┆ ---    ┆ ---                 │
│ f64      ┆ f64      ┆ f64      ┆ f64      ┆ f64    ┆ datetime[μs]        │
╞══════════╪══════════╪══════════╪══════════╪════════╪═════════════════════╡
│ 1150.67  ┆ 1150.67  ┆ 1150.67  ┆ 1150.67  ┆ 0.0    ┆ 2017-01-02 18:01:00 │
│ 1152.13  ┆ 1152.13  ┆ 1151.52  ┆ 1151.52  ┆ 0.0    ┆ 2017-01-02 18:02:00 │
│ 1151.45  ┆ 1151.8   ┆ 1151.2   ┆ 1151.56  ┆ 0.0    ┆ 2017-01-02 18:03:00 │
│ 1151.52  ┆ 1151.79  ┆ 1151.47  ┆ 1151.63  ┆ 0.0    ┆ 2017-01-02 18:04:00 │
│ 1151.59  ┆ 1151.59  ┆ 1150.57  ┆ 1150.63  ┆ 0.0    ┆ 2017-01-02 18:05:00 │
│ …        ┆ …        ┆ …        ┆ …        ┆ …      ┆ …                   │
│ 5039.685 ┆ 5042.635 ┆ 5039.238 ┆ 5042.635 ┆ 0.0    ┆ 2026-02-13 16:54:00 │
│ 5042.448 ┆ 5042.448 

In [5]:
df.write_parquet("GOLD-1m.parquet")


In [8]:
from datetime import date, timedelta
start = date(2017, 8, 17)
end   = date(2026, 2, 12)

df_filtered = df.filter(
    pl.col("date").is_between(start,end)
)


In [9]:
df_filtered.head

<bound method DataFrame.head of shape: (2_958_949, 6)
┌──────────┬──────────┬──────────┬──────────┬────────┬─────────────────────┐
│ open     ┆ high     ┆ low      ┆ close    ┆ volume ┆ date                │
│ ---      ┆ ---      ┆ ---      ┆ ---      ┆ ---    ┆ ---                 │
│ f64      ┆ f64      ┆ f64      ┆ f64      ┆ f64    ┆ datetime[μs]        │
╞══════════╪══════════╪══════════╪══════════╪════════╪═════════════════════╡
│ 1287.19  ┆ 1287.33  ┆ 1287.1   ┆ 1287.31  ┆ 0.0    ┆ 2017-08-17 00:00:00 │
│ 1287.3   ┆ 1287.43  ┆ 1287.2   ┆ 1287.24  ┆ 0.0    ┆ 2017-08-17 00:01:00 │
│ 1287.22  ┆ 1287.43  ┆ 1287.14  ┆ 1287.43  ┆ 0.0    ┆ 2017-08-17 00:02:00 │
│ 1287.42  ┆ 1287.44  ┆ 1287.16  ┆ 1287.32  ┆ 0.0    ┆ 2017-08-17 00:03:00 │
│ 1287.26  ┆ 1287.33  ┆ 1287.16  ┆ 1287.22  ┆ 0.0    ┆ 2017-08-17 00:04:00 │
│ …        ┆ …        ┆ …        ┆ …        ┆ …      ┆ …                   │
│ 5062.975 ┆ 5064.605 ┆ 5062.975 ┆ 5064.565 ┆ 0.0    ┆ 2026-02-11 23:56:00 │
│ 5064.535 ┆ 5067.275 

In [10]:
df_filtered.write_parquet("GOLD-1m.parquet")

In [13]:
df = pl.read_parquet("GOLD-1m.parquet")
df.head

<bound method DataFrame.head of shape: (2_958_949, 6)
┌──────────┬──────────┬──────────┬──────────┬────────┬─────────────────────┐
│ open     ┆ high     ┆ low      ┆ close    ┆ volume ┆ date                │
│ ---      ┆ ---      ┆ ---      ┆ ---      ┆ ---    ┆ ---                 │
│ f64      ┆ f64      ┆ f64      ┆ f64      ┆ f64    ┆ datetime[μs]        │
╞══════════╪══════════╪══════════╪══════════╪════════╪═════════════════════╡
│ 1287.19  ┆ 1287.33  ┆ 1287.1   ┆ 1287.31  ┆ 0.0    ┆ 2017-08-17 00:00:00 │
│ 1287.3   ┆ 1287.43  ┆ 1287.2   ┆ 1287.24  ┆ 0.0    ┆ 2017-08-17 00:01:00 │
│ 1287.22  ┆ 1287.43  ┆ 1287.14  ┆ 1287.43  ┆ 0.0    ┆ 2017-08-17 00:02:00 │
│ 1287.42  ┆ 1287.44  ┆ 1287.16  ┆ 1287.32  ┆ 0.0    ┆ 2017-08-17 00:03:00 │
│ 1287.26  ┆ 1287.33  ┆ 1287.16  ┆ 1287.22  ┆ 0.0    ┆ 2017-08-17 00:04:00 │
│ …        ┆ …        ┆ …        ┆ …        ┆ …      ┆ …                   │
│ 5062.975 ┆ 5064.605 ┆ 5062.975 ┆ 5064.565 ┆ 0.0    ┆ 2026-02-11 23:56:00 │
│ 5064.535 ┆ 5067.275 