In [2]:
load_data_code = '''
import pandas as pd
import os

def load_csv(filename, subfolder='raw'):
    """
    Load a CSV from the data directory.

    Args:
        filename (str): Name of the file (e.g., 'spy_data.csv')
        subfolder (str): Subdirectory under /data (e.g., 'raw', 'processed')

    Returns:
        pd.DataFrame: Loaded DataFrame
    """
    path = os.path.join("data", subfolder, filename)
    return pd.read_csv(path)
'''

with open("SPY_Model/utils/load_data.py", "w") as f:
    f.write(load_data_code)

print("load_data.py restored.")

load_data.py restored.


In [3]:
import os
import shutil

# Current working directory
base_dir = "SPY_Model"
source_dir = base_dir
target_dir = os.path.join(base_dir, "data", "raw")

# Make sure the target exists
os.makedirs(target_dir, exist_ok=True)

# Move all CSVs that start with 'SPY_5min' to data/raw/
for filename in os.listdir(source_dir):
    if filename.startswith("SPY_5min") and filename.endswith(".csv"):
        src = os.path.join(source_dir, filename)
        dst = os.path.join(target_dir, filename)
        print(f"Moving {filename} -> data/raw/")
        shutil.move(src, dst)

print("All files moved successfully.")

All files moved successfully.


In [4]:
import os
print(os.getcwd())

/mnt/batch/tasks/shared/LS_root/mounts/clusters/arisgkourelas1/code/Users/arisgkourelas/SPY_Model/notebooks


In [5]:
import os
import shutil

# Base dir is two levels up from current notebooks folder
base_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))

# Correct paths
source_dir = os.path.join(base_dir, "SPY_Model")
target_dir = os.path.join(base_dir, "SPY_Model/data/raw")

# Make sure target exists
os.makedirs(target_dir, exist_ok=True)

# Move all SPY csvs
for filename in os.listdir(source_dir):
    if filename.startswith("SPY_5min") and filename.endswith(".csv"):
        src = os.path.join(source_dir, filename)
        dst = os.path.join(target_dir, filename)
        print(f"Moving {filename} -> data/raw/")
        shutil.move(src, dst)

print("All files moved successfully.")

All files moved successfully.


In [6]:
import os

# Print current working directory
print("Current notebook directory:\n", os.getcwd(), "\n")

# Walk from the current notebook’s root
for root, dirs, files in os.walk("."):
    for name in dirs + files:
        print(os.path.join(root, name))

Current notebook directory:
 /mnt/batch/tasks/shared/LS_root/mounts/clusters/arisgkourelas1/code/Users/arisgkourelas/SPY_Model/notebooks 

./.ipynb_aml_checkpoints
./SPY_Model
./.amlignore
./.amlignore.amltmp
./01_data_loading.ipynb
./01_data_loading.ipynb.amltmp
./935_model.ipynb
./935_model.ipynb.amltmp
./spy_classifier.ipynb
./.ipynb_aml_checkpoints/01_data_loading-checkpoint2025-4-10-23-55-59Z.ipynb
./.ipynb_aml_checkpoints/01_data_loading-checkpoint2025-4-10-5-45-34Z.ipynb
./.ipynb_aml_checkpoints/01_data_loading-checkpoint2025-4-10-5-46-23Z.ipynb
./.ipynb_aml_checkpoints/01_data_loading-checkpoint2025-4-10-5-50-27Z.ipynb
./.ipynb_aml_checkpoints/01_data_loading-checkpoint2025-4-10-5-6-9Z.ipynb
./.ipynb_aml_checkpoints/01_data_loading-checkpoint2025-4-10-7-2-30Z.ipynb
./.ipynb_aml_checkpoints/01_data_loading-checkpoint2025-4-10-9-4-33Z.ipynb
./.ipynb_aml_checkpoints/935_model-checkpoint2025-4-10-23-55-57Z.ipynb
./.ipynb_aml_checkpoints/935_model-checkpoint2025-4-10-8-16-33Z.ipynb


In [7]:
import os
import pandas as pd

# Show working directory
print("\n--- Current Working Directory ---")
print(os.getcwd())

# Walk through current folder and print all subfolders and files
print("\n--- Full Folder Tree from Current Directory ---")
for root, dirs, files in os.walk("."):
    for name in dirs + files:
        print(os.path.join(root, name))

# Check key file path manually
print("\n--- Checking Key File Existence ---")
csv_path = os.path.join("..", "data", "raw", "SPY_5min_2020_05.csv")
print("Looking for file at:", csv_path)
print("Exists:", os.path.isfile(csv_path))

# Try loading the CSV directly using pandas
try:
    df = pd.read_csv(csv_path)
    print("\n--- CSV File Loaded Successfully ---")
    print(df.head())
except Exception as e:
    print("\n--- CSV Load Failed ---")
    print(repr(e))


--- Current Working Directory ---
/mnt/batch/tasks/shared/LS_root/mounts/clusters/arisgkourelas1/code/Users/arisgkourelas/SPY_Model/notebooks

--- Full Folder Tree from Current Directory ---
./.ipynb_aml_checkpoints
./SPY_Model
./.amlignore
./.amlignore.amltmp
./01_data_loading.ipynb
./01_data_loading.ipynb.amltmp
./935_model.ipynb
./935_model.ipynb.amltmp
./spy_classifier.ipynb
./.ipynb_aml_checkpoints/01_data_loading-checkpoint2025-4-10-23-55-59Z.ipynb
./.ipynb_aml_checkpoints/01_data_loading-checkpoint2025-4-10-5-45-34Z.ipynb
./.ipynb_aml_checkpoints/01_data_loading-checkpoint2025-4-10-5-46-23Z.ipynb
./.ipynb_aml_checkpoints/01_data_loading-checkpoint2025-4-10-5-50-27Z.ipynb
./.ipynb_aml_checkpoints/01_data_loading-checkpoint2025-4-10-5-6-9Z.ipynb
./.ipynb_aml_checkpoints/01_data_loading-checkpoint2025-4-10-7-2-30Z.ipynb
./.ipynb_aml_checkpoints/01_data_loading-checkpoint2025-4-10-9-4-33Z.ipynb
./.ipynb_aml_checkpoints/935_model-checkpoint2025-4-10-23-55-57Z.ipynb
./.ipynb_aml_chec

In [8]:
import os
import pandas as pd

# Construct the absolute path.
abs_path = os.path.abspath(os.path.join("..", "data", "raw", "SPY_5min_2020_05.csv"))
print("Absolute file path:", abs_path)

# Try loading the CSV directly using the absolute path.
try:
    df = pd.read_csv(abs_path)
    print("CSV Loaded Successfully!")
    print(df.head())
except Exception as e:
    print("Error loading CSV:", repr(e))

Absolute file path: /mnt/batch/tasks/shared/LS_root/mounts/clusters/arisgkourelas1/code/Users/arisgkourelas/SPY_Model/data/raw/SPY_5min_2020_05.csv
CSV Loaded Successfully!
   volume        vw    open   close    high     low            timestamp  \
0  4927.0  294.0346  293.83  293.96  294.34  293.83  2020-05-11 08:00:00   
1  7420.0  293.9817  294.01  293.92  294.04  293.92  2020-05-11 08:05:00   
2  3675.0  293.8851  293.95  293.60  293.98  293.59  2020-05-11 08:10:00   
3  6945.0  293.5993  293.60  293.67  293.69  293.57  2020-05-11 08:15:00   
4  4252.0  293.7026  293.68  293.55  293.76  293.55  2020-05-11 08:20:00   

   trades  
0      48  
1      46  
2      35  
3      35  
4      45  


In [9]:
import sys
import os

# Add full path to the utils folder
sys.path.append(os.path.abspath("../utils"))

from load_data import load_csv

In [10]:
import pandas as pd
import os

from load_data import load_csv

# Define raw data folder path relative to this notebook
raw_dir = "../data/raw"

# List and sort all SPY CSV files
all_files = sorted(f for f in os.listdir(raw_dir) if f.startswith("SPY_5min") and f.endswith(".csv"))

# Load and combine
df_list = [load_csv(f, subfolder=raw_dir) for f in all_files]
full_df = pd.concat(df_list, ignore_index=True)

print("Combined full_df shape:", full_df.shape)
full_df.head()

Combined full_df shape: (487214, 8)


Unnamed: 0,volume,vw,open,close,high,low,timestamp,trades
0,4927.0,294.0346,293.83,293.96,294.34,293.83,2020-05-11 08:00:00,48
1,7420.0,293.9817,294.01,293.92,294.04,293.92,2020-05-11 08:05:00,46
2,3675.0,293.8851,293.95,293.6,293.98,293.59,2020-05-11 08:10:00,35
3,6945.0,293.5993,293.6,293.67,293.69,293.57,2020-05-11 08:15:00,35
4,4252.0,293.7026,293.68,293.55,293.76,293.55,2020-05-11 08:20:00,45


In [11]:
# Save combined data to processed folder
output_path = "../data/processed/full_SPY_data.csv"
full_df.to_csv(output_path, index=False)
print(f"Saved to: {output_path}")

Saved to: ../data/processed/full_SPY_data.csv
