In [1]:
# Imports
import polars as pl
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf


In [2]:
# Extract and view data
start_date = "2023-01-01"
end_date = "2024-06-01"
symbol = "SHIB-USD"

# Fetch data using yfinance
df_yf = yf.download(symbol, start=start_date, end=end_date)

# Convert to Polars DataFrame
df = pl.from_pandas(df_yf.reset_index())
df.head(5)

[*********************100%%**********************]  1 of 1 completed


Date,Open,High,Low,Close,Adj Close,Volume
datetime[ns],f64,f64,f64,f64,f64,i64
2023-01-01 00:00:00,8e-06,8e-06,8e-06,8e-06,8e-06,54630954
2023-01-02 00:00:00,8e-06,8e-06,8e-06,8e-06,8e-06,87586024
2023-01-03 00:00:00,8e-06,8e-06,8e-06,8e-06,8e-06,80033128
2023-01-04 00:00:00,8e-06,8e-06,8e-06,8e-06,8e-06,143577598
2023-01-05 00:00:00,8e-06,9e-06,8e-06,8e-06,8e-06,262459409


In [3]:
# Add a column for returns
df = df.with_columns(
    pl.col("Close").pct_change().alias("Returns")
)
print(df.head(3))

shape: (3, 8)
┌─────────────────────┬──────────┬──────────┬──────────┬──────────┬───────────┬──────────┬─────────┐
│ Date                ┆ Open     ┆ High     ┆ Low      ┆ Close    ┆ Adj Close ┆ Volume   ┆ Returns │
│ ---                 ┆ ---      ┆ ---      ┆ ---      ┆ ---      ┆ ---       ┆ ---      ┆ ---     │
│ datetime[ns]        ┆ f64      ┆ f64      ┆ f64      ┆ f64      ┆ f64       ┆ i64      ┆ f64     │
╞═════════════════════╪══════════╪══════════╪══════════╪══════════╪═══════════╪══════════╪═════════╡
│ 2023-01-01 00:00:00 ┆ 0.000008 ┆ 0.000008 ┆ 0.000008 ┆ 0.000008 ┆ 0.000008  ┆ 54630954 ┆ null    │
│ 2023-01-02 00:00:00 ┆ 0.000008 ┆ 0.000008 ┆ 0.000008 ┆ 0.000008 ┆ 0.000008  ┆ 87586024 ┆ 0.0     │
│ 2023-01-03 00:00:00 ┆ 0.000008 ┆ 0.000008 ┆ 0.000008 ┆ 0.000008 ┆ 0.000008  ┆ 80033128 ┆ 0.0     │
└─────────────────────┴──────────┴──────────┴──────────┴──────────┴───────────┴──────────┴─────────┘


In [4]:
import numpy as np

# Calculate log returns
df = df.with_columns(
    (pl.col("Close") / pl.col("Close").shift(1)).map_batches(np.log).alias("Log Returns")
)
print(df.head(3))

shape: (3, 9)
┌──────────────┬──────────┬──────────┬──────────┬───┬───────────┬──────────┬─────────┬─────────────┐
│ Date         ┆ Open     ┆ High     ┆ Low      ┆ … ┆ Adj Close ┆ Volume   ┆ Returns ┆ Log Returns │
│ ---          ┆ ---      ┆ ---      ┆ ---      ┆   ┆ ---       ┆ ---      ┆ ---     ┆ ---         │
│ datetime[ns] ┆ f64      ┆ f64      ┆ f64      ┆   ┆ f64       ┆ i64      ┆ f64     ┆ f64         │
╞══════════════╪══════════╪══════════╪══════════╪═══╪═══════════╪══════════╪═════════╪═════════════╡
│ 2023-01-01   ┆ 0.000008 ┆ 0.000008 ┆ 0.000008 ┆ … ┆ 0.000008  ┆ 54630954 ┆ null    ┆ null        │
│ 00:00:00     ┆          ┆          ┆          ┆   ┆           ┆          ┆         ┆             │
│ 2023-01-02   ┆ 0.000008 ┆ 0.000008 ┆ 0.000008 ┆ … ┆ 0.000008  ┆ 87586024 ┆ 0.0     ┆ 0.0         │
│ 00:00:00     ┆          ┆          ┆          ┆   ┆           ┆          ┆         ┆             │
│ 2023-01-03   ┆ 0.000008 ┆ 0.000008 ┆ 0.000008 ┆ … ┆ 0.000008  ┆ 80033128 ┆ 

In [5]:
# Cumulative sum of log returns
df = df.with_columns(
    pl.col("Log Returns").cum_sum().alias("CumSum")
)
print(df.head())


shape: (5, 10)
┌────────────────┬──────────┬──────────┬──────────┬───┬───────────┬─────────┬─────────────┬────────┐
│ Date           ┆ Open     ┆ High     ┆ Low      ┆ … ┆ Volume    ┆ Returns ┆ Log Returns ┆ CumSum │
│ ---            ┆ ---      ┆ ---      ┆ ---      ┆   ┆ ---       ┆ ---     ┆ ---         ┆ ---    │
│ datetime[ns]   ┆ f64      ┆ f64      ┆ f64      ┆   ┆ i64       ┆ f64     ┆ f64         ┆ f64    │
╞════════════════╪══════════╪══════════╪══════════╪═══╪═══════════╪═════════╪═════════════╪════════╡
│ 2023-01-01     ┆ 0.000008 ┆ 0.000008 ┆ 0.000008 ┆ … ┆ 54630954  ┆ null    ┆ null        ┆ null   │
│ 00:00:00       ┆          ┆          ┆          ┆   ┆           ┆         ┆             ┆        │
│ 2023-01-02     ┆ 0.000008 ┆ 0.000008 ┆ 0.000008 ┆ … ┆ 87586024  ┆ 0.0     ┆ 0.0         ┆ 0.0    │
│ 00:00:00       ┆          ┆          ┆          ┆   ┆           ┆         ┆             ┆        │
│ 2023-01-03     ┆ 0.000008 ┆ 0.000008 ┆ 0.000008 ┆ … ┆ 80033128  ┆ 0.0     

In [6]:
df_new = df.clone()

# Extract Arrays and Values from DF

In [7]:
import polars as pl
import numpy as np

# Assuming df_new is your original Polars DataFrame

# Create a copy of the DataFrame
df_x = df_new.clone()

In [12]:
close_prices = df_x['Close'].to_numpy() # in pandas it would be .values
print(close_prices[:4])

[7.99999998e-06 7.99999998e-06 7.99999998e-06 7.99999998e-06]


In [13]:
# Convert to list if needed
close_prices_list = close_prices.tolist()
print(close_prices_list[:4])


[7.999999979801942e-06, 7.999999979801942e-06, 7.999999979801942e-06, 7.999999979801942e-06]


In [14]:
# Modify close prices
mod_close_prices = close_prices / 2
print(mod_close_prices[:4])

[3.99999999e-06 3.99999999e-06 3.99999999e-06 3.99999999e-06]


In [18]:
# Add modified prices back to DataFrame
df_x = df_x.with_columns(pl.Series('mod_close', mod_close_prices))
print(df_x.head())
'''
1. `with_columns()`: 
   - This is a Polars method used to add new columns or modify existing ones in a DataFrame.
   - It returns a new DataFrame with the changes applied.

2. `pl.Series('mod_close', mod_close_prices)`:
   - This creates a new Polars Series.
   - 'mod_close' is the name of the new column.
   - `mod_close_prices` is the NumPy array containing the data for this column.

'''

shape: (5, 11)
┌────────────────┬──────────┬──────────┬──────────┬───┬─────────┬─────────────┬────────┬───────────┐
│ Date           ┆ Open     ┆ High     ┆ Low      ┆ … ┆ Returns ┆ Log Returns ┆ CumSum ┆ mod_close │
│ ---            ┆ ---      ┆ ---      ┆ ---      ┆   ┆ ---     ┆ ---         ┆ ---    ┆ ---       │
│ datetime[ns]   ┆ f64      ┆ f64      ┆ f64      ┆   ┆ f64     ┆ f64         ┆ f64    ┆ f64       │
╞════════════════╪══════════╪══════════╪══════════╪═══╪═════════╪═════════════╪════════╪═══════════╡
│ 2023-01-01     ┆ 0.000008 ┆ 0.000008 ┆ 0.000008 ┆ … ┆ null    ┆ null        ┆ null   ┆ 0.000004  │
│ 00:00:00       ┆          ┆          ┆          ┆   ┆         ┆             ┆        ┆           │
│ 2023-01-02     ┆ 0.000008 ┆ 0.000008 ┆ 0.000008 ┆ … ┆ 0.0     ┆ 0.0         ┆ 0.0    ┆ 0.000004  │
│ 00:00:00       ┆          ┆          ┆          ┆   ┆         ┆             ┆        ┆           │
│ 2023-01-03     ┆ 0.000008 ┆ 0.000008 ┆ 0.000008 ┆ … ┆ 0.0     ┆ 0.0       

"\n1. `with_columns()`: \n   - This is a Polars method used to add new columns or modify existing ones in a DataFrame.\n   - It returns a new DataFrame with the changes applied.\n\n2. `pl.Series('mod_close', mod_close_prices)`:\n   - This creates a new Polars Series.\n   - 'mod_close' is the name of the new column.\n   - `mod_close_prices` is the NumPy array containing the data for this column.\n\n"

In [21]:
# Get a single item
price = df_x['Close'][0:4]
print(price)

shape: (4,)
Series: 'Close' [f64]
[
	0.000008
	0.000008
	0.000008
	0.000008
]


In [22]:
# Multiple adjustments
df_x = df_x.with_columns([
    pl.col(['Open', 'Close', 'Volume']) / pl.col(['Open', 'Close', 'Volume']).max()
])
print(df_x.tail())

shape: (5, 11)
┌─────────────┬──────────┬──────────┬──────────┬───┬───────────┬────────────┬──────────┬───────────┐
│ Date        ┆ Open     ┆ High     ┆ Low      ┆ … ┆ Returns   ┆ Log        ┆ CumSum   ┆ mod_close │
│ ---         ┆ ---      ┆ ---      ┆ ---      ┆   ┆ ---       ┆ Returns    ┆ ---      ┆ ---       │
│ datetime[ns ┆ f64      ┆ f64      ┆ f64      ┆   ┆ f64       ┆ ---        ┆ f64      ┆ f64       │
│ ]           ┆          ┆          ┆          ┆   ┆           ┆ f64        ┆          ┆           │
╞═════════════╪══════════╪══════════╪══════════╪═══╪═══════════╪════════════╪══════════╪═══════════╡
│ 2024-05-27  ┆ 0.666667 ┆ 0.000026 ┆ 0.000024 ┆ … ┆ 0.083333  ┆ 0.080043   ┆ 1.178655 ┆ 0.000013  │
│ 00:00:00    ┆          ┆          ┆          ┆   ┆           ┆            ┆          ┆           │
│ 2024-05-28  ┆ 0.722222 ┆ 0.000027 ┆ 0.000025 ┆ … ┆ 0.038462  ┆ 0.03774    ┆ 1.216395 ┆ 0.000014  │
│ 00:00:00    ┆          ┆          ┆          ┆   ┆           ┆            

In [23]:
# Create a series (in Polars, this is just a Series)
series_1_list = [1, 3, 8, 4, 3]
series_1 = pl.Series('series', series_1_list)
print(series_1)


shape: (5,)
Series: 'series' [i64]
[
	1
	3
	8
	4
	3
]


In [24]:
# Create DataFrame from series
df_created = pl.DataFrame({'series': series_1})
print(df_created)

shape: (5, 1)
┌────────┐
│ series │
│ ---    │
│ i64    │
╞════════╡
│ 1      │
│ 3      │
│ 8      │
│ 4      │
│ 3      │
└────────┘


In [30]:
# Save DataFrame to CSV
df_created.write_csv('polars_testv2.csv')

In [31]:
print(df_created)

shape: (5, 1)
┌────────┐
│ series │
│ ---    │
│ i64    │
╞════════╡
│ 1      │
│ 3      │
│ 8      │
│ 4      │
│ 3      │
└────────┘


In [32]:
#load dataframe
my_df = pl.read_csv('polars_testv2.csv')
print(my_df)

shape: (5, 1)
┌────────┐
│ series │
│ ---    │
│ i64    │
╞════════╡
│ 1      │
│ 3      │
│ 8      │
│ 4      │
│ 3      │
└────────┘
