In [1]:
from watermark import watermark
# Imports
import sys
import os
import platform
from os.path import exists

import pandas as pd
import polars as pl    # using Polars DataFrame library for Rust and Python
import numpy as np
import yfinance as yf
import pyarrow as pa  #  pyarrow library to convert pandas dataframe to arrow format, to then convert it to a polars dataframe

import matplotlib.pyplot as plt



import pickle                                          # method for save trained/fit model/s
import joblib                                          # method for save trained/fit model/s

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Report Technologies
print(f'Python Platform: {platform.platform()}')
print(f'Python {sys.version}')
print(watermark())
print(watermark(iversions=True, globals_=globals()))

Python Platform: macOS-13.0.1-arm64-arm-64bit
Python 3.9.15 | packaged by conda-forge | (main, Nov 22 2022, 08:52:10) 
[Clang 14.0.6 ]
Last updated: 2023-04-28T16:29:47.799307-05:00

Python implementation: CPython
Python version       : 3.9.15
IPython version      : 8.6.0

Compiler    : Clang 14.0.6 
OS          : Darwin
Release     : 22.1.0
Machine     : arm64
Processor   : arm
CPU cores   : 10
Architecture: 64bit

polars    : 0.15.11
platform  : 1.0.8
matplotlib: 3.5.3
numpy     : 1.21.5
pyarrow   : 10.0.1
sys       : 3.9.15 | packaged by conda-forge | (main, Nov 22 2022, 08:52:10) 
[Clang 14.0.6 ]
yfinance  : 0.2.18
pandas    : 1.5.1



In [3]:
# block one

def polars_preprocessing_yf(symbol):
    # Import the data
    df = yf.download(symbol).dropna().reset_index()
    table = pl.from_pandas(df)
    
    # Rename the column with new name to "index"
    table = table.rename({"Date": "index"})
    
    # Remove the "adj close" column
    table = table.drop("Adj Close")
    
    return table

polars_df = polars_preprocessing_yf("EURUSD=X")
polars_df

[*********************100%***********************]  1 of 1 completed


index,Open,High,Low,Close,Volume
datetime[ns],f64,f64,f64,f64,i64
2003-12-01 00:00:00,1.203398,1.204007,1.194401,1.196501,0
2003-12-02 00:00:00,1.196101,1.210903,1.1946,1.208897,0
2003-12-03 00:00:00,1.209,1.213003,1.2077,1.212298,0
2003-12-04 00:00:00,1.212004,1.214403,1.204398,1.208094,0
2003-12-05 00:00:00,1.207802,1.219096,1.206593,1.218695,0
2003-12-08 00:00:00,1.216797,1.224005,1.215407,1.222001,0
2003-12-09 00:00:00,1.222105,1.227702,1.219795,1.224995,0
2003-12-10 00:00:00,1.224905,1.226603,1.216205,1.219096,0
2003-12-11 00:00:00,1.219096,1.223496,1.212298,1.222404,0
2003-12-12 00:00:00,1.222703,1.230603,1.2213,1.227898,0


In [4]:
# block two

# Create Simple moving average 30 days
polars_df = polars_df.with_column(
    pl.col("Close")
    .interpolate()
    .rolling_mean(window_size=30)
    .over("index")
    .alias("SMA fast")
)

# Create Simple moving average 60 days
polars_df = polars_df.with_column(
    pl.col("Close")
    .interpolate()
    .rolling_mean(window_size=60)
    .over("index")
    .alias("SMA slow")
)