In [7]:
import wrds
import polars as pl

def connect_to_wrds():
    # Connect to WRDS using your credentials
    conn = wrds.Connection()
    return conn

In [3]:
def get_ohlcv_data(conn, tickers, start_date, end_date):
    # Query CRSP for OHLCV data (daily)
    query = f"""
        SELECT date, permno, cusip, ticker, prc AS close, 
               bidlo AS low, askhi AS high, vol AS volume, openprc AS open
        FROM crsp.dsf
        WHERE ticker IN ({','.join([f"'{ticker}'" for ticker in tickers])})
          AND date BETWEEN '{start_date}' AND '{end_date}'
        ORDER BY date
    """
    
    data = conn.raw_sql(query)
    return data

def convert_to_polars(data):
    # Convert to Polars DataFrame
    df = pl.DataFrame(data)
    
    # Ensure the date is in the correct format
    df = df.with_columns(df['date'].str.strptime(pl.Date, format='%Y-%m-%d'))
    
    return df


In [4]:
def convert_to_lean_format_polars(df, ticker, frequency='daily'):
    # Ensure correct order of columns
    df = df.select(['date', 'open', 'high', 'low', 'close', 'volume'])
    
    # Convert date to the required format (YYYYMMDD)
    df = df.with_columns(df['date'].dt.strftime('%Y%m%d').alias('date'))
    
    # Save to the appropriate location
    if frequency == 'daily':
        output_file = f"data/equity/usa/daily/{ticker}.csv"
    elif frequency == 'hourly':
        output_file = f"data/equity/usa/hourly/{ticker}.csv"
    else:
        output_file = f"data/equity/usa/minute/{ticker}.zip"

    # Save as CSV in the required format
    df.write_csv(output_file)


In [6]:
def wrds_pipeline(tickers, start_date, end_date, frequency='daily'):
    # Connect to WRDS
    conn = connect_to_wrds()
    
    for ticker in tickers:
        # Get OHLCV data
        raw_data = get_ohlcv_data(conn, [ticker], start_date, end_date)
        
        # Convert to Polars DataFrame
        df_polars = convert_to_polars(raw_data)
        
        # Convert and save in LEAN format
        convert_to_lean_format_polars(df_polars, ticker, frequency)
    
    # Close the connection when done
    conn.close()

# Example usage
tickers = ['AAPL']
wrds_pipeline(tickers, '2023-01-01', '2023-09-01', 'daily')


AttributeError: module 'wrds' has no attribute 'Connection'