In [12]:
import yfinance as yf
import pandas as pd
import os
from datetime import datetime

# Create data directory if it doesn't exist
os.makedirs('../data/raw', exist_ok=True)

# Define the tickers and time period
tickers = ['TSLA', 'BND', 'SPY']
start_date = '2015-07-01'
end_date = '2025-07-31'

def fetch_data(ticker, start, end):
    try:
        print(f"\nDownloading {ticker} data...")
        
        # Download data with error handling
        data = yf.Ticker(ticker).history(
            start=start,
            end=end,
            auto_adjust=False,
            actions=True
        )
        
        # Reset index and clean up
        data = data.reset_index()
        data.columns = data.columns.str.lower()
        
        # Select and rename columns
        column_mapping = {
            'date': 'Date',
            'open': 'Open',
            'high': 'High',
            'low': 'Low',
            'close': 'Close',
            'adj close': 'Adj Close',
            'volume': 'Volume'
        }
        
        data = data.rename(columns=column_mapping)
        data = data[list(column_mapping.values())]
        
        # Add ticker column
        data['Ticker'] = ticker
        
        # Convert dtypes
        numeric_cols = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
        data[numeric_cols] = data[numeric_cols].apply(pd.to_numeric, errors='coerce')
        data['Date'] = pd.to_datetime(data['Date'])
        
        # Remove any rows with missing dates
        data = data[data['Date'].notna()]
        
        # Forward fill any missing values
        data.fillna(method='ffill', inplace=True)
        
        print(f"Successfully downloaded {len(data)} rows for {ticker}")
        print(f"Date range: {data['Date'].min().date()} to {data['Date'].max().date()}")
        
        return data
        
    except Exception as e:
        print(f"Error downloading {ticker}: {str(e)}")
        return None

# Fetch and save all data
for ticker in tickers:
    df = fetch_data(ticker, start_date, end_date)
    if df is not None:
        # Save with all columns
        df.to_csv(
            f'../data/raw/{ticker}_historical.csv',
            index=False,
            date_format='%Y-%m-%d'
        )
        print(f"Saved {ticker} data to CSV\n")
    else:
        print(f"Failed to save {ticker} data\n")

print("Data collection complete!")


Downloading TSLA data...
Successfully downloaded 2535 rows for TSLA
Date range: 2015-07-01 to 2025-07-30
Saved TSLA data to CSV


Downloading BND data...
Successfully downloaded 2535 rows for BND
Date range: 2015-07-01 to 2025-07-30
Saved BND data to CSV


Downloading SPY data...
Successfully downloaded 2535 rows for SPY
Date range: 2015-07-01 to 2025-07-30
Saved SPY data to CSV

Data collection complete!


  data.fillna(method='ffill', inplace=True)
  data.fillna(method='ffill', inplace=True)
  data.fillna(method='ffill', inplace=True)


In [13]:
def verify_data_structure(ticker):
    file_path = f'../data/raw/{ticker}_historical.csv'
    try:
        df = pd.read_csv(file_path, parse_dates=['Date'])
        print(f"\n=== Verification for {ticker} ===")
        print("Columns:", df.columns.tolist())
        print("\nFirst 5 rows:")
        print(df.head())
        print("\nLast 5 rows:")
        print(df.tail())
        print("\nData types:")
        print(df.dtypes)
        print("\nMissing values:")
        print(df.isnull().sum())
        print("\nDate range:", df['Date'].min(), "to", df['Date'].max())
        print("\nBasic statistics:")
        print(df.describe())
    except Exception as e:
        print(f"Verification failed for {ticker}: {str(e)}")

print("\nVerifying downloaded data...")
for ticker in tickers:
    verify_data_structure(ticker)


Verifying downloaded data...

=== Verification for TSLA ===
Columns: ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Ticker']

First 5 rows:
        Date       Open       High        Low      Close  Adj Close  \
0 2015-07-01  18.073999  18.174667  17.856667  17.943333  17.943333   
1 2015-07-02  18.680000  18.830000  18.220667  18.667999  18.667999   
2 2015-07-06  18.591999  18.779333  18.420000  18.648001  18.648001   
3 2015-07-07  18.333332  18.346666  17.384666  17.858667  17.858667   
4 2015-07-08  17.288000  17.386667  16.954000  16.997334  16.997334   

      Volume Ticker  
0   31518000   TSLA  
1  107458500   TSLA  
2   61828500   TSLA  
3   91576500   TSLA  
4   93316500   TSLA  

Last 5 rows:
           Date        Open        High         Low       Close   Adj Close  \
2530 2025-07-24  310.000000  310.149994  300.410004  305.299988  305.299988   
2531 2025-07-25  308.739990  323.630005  308.010010  316.059998  316.059998   
2532 2025-07-28  318.450012  33