# Add NRDY path to the combined path

In [17]:
import os
import polars as pl

from pattern_detector import utils

### Read combined

In [10]:
combined_path = os.path.join('data', 'silver', 'combined.parquet')

combined_df = pl.read_parquet(combined_path)

start_required = min(combined_df['date'])
end_date = max(combined_df['date'])

print(start_required, end_date)

2014-09-17 2024-09-13


### Cleaning

In [25]:
ticker = 'NRDY'
nrdy_path = os.path.join('data', 'NRDY.csv')
    
# read file into df
df = pl.read_csv(nrdy_path)

# type Date column
df = df.with_columns(pl.col('Date').str.to_date().alias('Date'))

# remove unneeded columns
df = df.select('Date', 'Adj Close')

# rename columns
df = df.rename({'Date': 'date', 'Adj Close': 'adjclose'})

# limit data to last 10 years
df = df.filter(df["date"] > start_required)

# rename adjclose to ticker
df = df.rename({"adjclose": ticker})

# remove timestamp column
df = df.select(["date", ticker])

In [26]:
percent_df = utils.convert_to_percentage(df)

# drop first row
percent_df = percent_df[1:]

print(percent_df)

shape: (860, 2)
┌────────────┬───────────┐
│ date       ┆ NRDY      │
│ ---        ┆ ---       │
│ date       ┆ f64       │
╞════════════╪═══════════╡
│ 2020-12-01 ┆ -0.010224 │
│ 2020-12-02 ┆ 0.0       │
│ 2020-12-03 ┆ 0.04181   │
│ 2020-12-04 ┆ -0.017941 │
│ 2020-12-07 ┆ 0.009519  │
│ …          ┆ …         │
│ 2024-04-26 ┆ 0.061538  │
│ 2024-04-29 ┆ -0.018116 │
│ 2024-04-30 ┆ -0.04428  │
│ 2024-05-01 ┆ -0.011583 │
│ 2024-05-02 ┆ 0.03125   │
└────────────┴───────────┘


In [28]:
# join dfs
final_df = combined_df.join(percent_df, on="date", how="full", coalesce=True)

# write to parquet
out_filepath = os.path.join("data", "silver", "combined_with_nrdy.parquet")
final_df.write_parquet(out_filepath)