### Lean Converter
Takes the input as a csv file for a ticker, then converts it to the LEAN CLI format. The output is a csv file that can be used to import the data into LEAN.

In [1]:
import pandas as pd
def convert_to_lean_format(csv_file, ticker, frequency='daily'):
    """
    Converts a CSV file containing stock data into a format compatible with LEAN Local CLI Framework.
    Args:
        csv_file (str): The path to the input CSV file containing stock data.
        ticker (str): The stock ticker symbol.
        frequency (str, optional): The frequency of the data. Can be 'daily', 'hourly', or 'minute'. Defaults to 'daily'.
    Returns:
        None: The function saves the converted data to a file in the appropriate directory based on the frequency. Located within project directory.
    """
    
    df = pd.read_csv(csv_file)
    df['date'] = pd.to_datetime(df.pop('ts_event')).dt.strftime('%Y%m%d')
    df = df[['date', 'open', 'high', 'low', 'close', 'volume']]
    
    if frequency == 'daily':
        output_file = f"data/equity/usa/daily/{ticker}.csv"
    elif frequency == 'hourly':
        output_file = f"data/equity/usa/hourly/{ticker}.csv"
    else:
        output_file = f"data/equity/usa/minute/{ticker}.zip"
    
    df.to_csv(output_file, index=False)

# Example conversion
convert_to_lean_format('AAPL_data.csv', 'AAPL', frequency='daily')


V2 based on the sample readme within data file
##### To convert the data into the target format specified in the markdown readme "sample_readme.md" file, the following steps are performed:#####

Read the CSV file using pandas.
Convert the date format to YYYYMMDD HH:MM.
Convert prices from dollars to deci-cents.
Reorder the columns to match the target format: DateTime, Open, High, Low, Close, Volume.
Save the formatted data to a zip file containing a CSV file named aapl.csv.

##### Reading the CSV File: ######

The pd.read_csv(csv_file) function reads the CSV file into a pandas DataFrame.
Converting the Date Format:

The pd.to_datetime(df['date']).dt.strftime('%Y%m%d %H:%M') function converts the date column to the YYYYMMDD HH:MM format.
Converting Prices to Deci-cents:

The prices are multiplied by 10,000 and converted to integers to represent deci-cents.
Reordering the Columns:

The columns are reordered to match the target format and renamed accordingly.
Saving and Zipping the Formatted Data:

The formatted data is saved to a CSV file, which is then zipped into a file named aapl.zip. The original CSV file is removed after zipping.

In [3]:
import pandas as pd
from datetime import datetime
import zipfile
import os

def convert_to_lean_hourly_daily_format(csv_file, ticker):
    # Read the CSV file
    df = pd.read_csv(csv_file)
    
    # Convert the date format to 'YYYYMMDD HH:MM'
    #df['date'] = pd.to_datetime(df['date']).dt.strftime('%Y%m%d %H:%M')
    df['date'] = pd.to_datetime(df['ts_event']).dt.strftime('%Y%m%d %H:%M')
    
    # Convert prices from dollars to deci-cents
    df['open'] = (df['open'] * 10000).astype(int)
    df['high'] = (df['high'] * 10000).astype(int)
    df['low'] = (df['low'] * 10000).astype(int)
    df['close'] = (df['close'] * 10000).astype(int)
    
    # Reorder the columns to match the target format
    df = df[['date', 'open', 'high', 'low', 'close', 'volume']]
    df.columns = ['DateTime', 'Open', 'High', 'Low', 'Close', 'Volume']
    
    # Create the output directory if it doesn't exist
    output_dir = f"data/equity/usa/hour"
    os.makedirs(output_dir, exist_ok=True)
    
    # Save the formatted data to a CSV file
    output_csv = f"{output_dir}/{ticker}.csv"
    df.to_csv(output_csv, index=False)
    
    # Zip the CSV file
    with zipfile.ZipFile(f"{output_dir}/{ticker}.zip", 'w', zipfile.ZIP_DEFLATED) as zipf:
        zipf.write(output_csv, arcname=f"{ticker}.csv")
    
    # Remove the CSV file after zipping
    os.remove(output_csv)

# Example conversion
convert_to_lean_hourly_daily_format('AAPL_data.csv', 'aapl')