# Data Prepration

In [11]:
import yfinance as yf
import pandas as pd
import os

# Path to raw data folder (relative to your project root)
raw_data_folder = r"C:\Users\haier\AI_Stock_Analyzer\Data\raw"
os.makedirs(raw_data_folder, exist_ok=True)

print(f"Data will be saved in: {raw_data_folder}")

# List of tickers
tickers = ["AAPL", "TSLA", "MSFT", "AMZN", "NVDA"]

# Fetch 8 years of data
start_date = "2017-01-01"
end_date = "2025-01-01"

all_data = {}

for ticker in tickers:
    print(f"\nFetching data for {ticker}...")
    try:
        # Download data
        df = yf.download(ticker, start=start_date, end=end_date, auto_adjust=False)

        if df.empty:
            print(f"Warning: No data found for {ticker}, skipping.")
            continue

        # Flatten MultiIndex columns if any
        if isinstance(df.columns, pd.MultiIndex):
            df.columns = [col[0] for col in df.columns]

        # Drop Adj Close if present
        if "Adj Close" in df.columns:
            df.drop(columns=["Adj Close"], inplace=True)

        # Keep only OHLCV
        df = df[['Open', 'High', 'Low', 'Close', 'Volume']]

        # Ensure Date is index
        df.index.name = "Date"

        # Save to CSV
        save_path = os.path.join(raw_data_folder, f"{ticker}.csv")
        df.to_csv(save_path)

        # Store in dictionary for preview
        all_data[ticker] = df

        # Show preview
        print(f"Preview of {ticker} data:")
        print(df.head())
        print(f"Saved {len(df)} rows for {ticker}\n")

    except Exception as e:
        print(f"Error fetching {ticker}: {e}")

print("Data collection complete.")
print("Cleaned CSV files saved to:", raw_data_folder)


[*********************100%***********************]  1 of 1 completed

Data will be saved in: C:\Users\haier\AI_Stock_Analyzer\Data\raw

Fetching data for AAPL...



[*********************100%***********************]  1 of 1 completed

Preview of AAPL data:
                 Open       High        Low      Close     Volume
Date                                                             
2017-01-03  28.950001  29.082500  28.690001  29.037500  115127600
2017-01-04  28.962500  29.127501  28.937500  29.004999   84472400
2017-01-05  28.980000  29.215000  28.952499  29.152500   88774400
2017-01-06  29.195000  29.540001  29.117500  29.477501  127007600
2017-01-09  29.487499  29.857500  29.485001  29.747499  134247600
Saved 2012 rows for AAPL


Fetching data for TSLA...



[*********************100%***********************]  1 of 1 completed


Preview of TSLA data:
                 Open       High        Low      Close     Volume
Date                                                             
2017-01-03  14.324000  14.688667  14.064000  14.466000   88849500
2017-01-04  14.316667  15.200000  14.287333  15.132667  168202500
2017-01-05  15.094667  15.165333  14.796667  15.116667   88675500
2017-01-06  15.128667  15.354000  15.030000  15.267333   82918500
2017-01-09  15.264667  15.461333  15.200000  15.418667   59692500
Saved 2012 rows for TSLA


Fetching data for MSFT...
Preview of MSFT data:
                 Open       High        Low      Close    Volume
Date                                                            
2017-01-03  62.790001  62.840000  62.130001  62.580002  20694100
2017-01-04  62.480000  62.750000  62.119999  62.299999  21340000
2017-01-05  62.189999  62.660000  62.029999  62.299999  24876000
2017-01-06  62.299999  63.150002  62.040001  62.840000  19922900
2017-01-09  62.759998  63.080002  62.540001  62.639

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Preview of AMZN data:
                 Open       High        Low      Close     Volume
Date                                                             
2017-01-03  37.896000  37.938000  37.384998  37.683498   70422000
2017-01-04  37.919498  37.984001  37.709999  37.859001   50210000
2017-01-05  38.077499  39.119999  38.013000  39.022499  116602000
2017-01-06  39.118000  39.972000  38.924000  39.799500  119724000
2017-01-09  39.900002  40.088501  39.588501  39.846001   68922000
Saved 2012 rows for AMZN


Fetching data for NVDA...
Preview of NVDA data:
               Open     High      Low    Close      Volume
Date                                                      
2017-01-03  2.61000  2.65925  2.48450  2.55025  1501996000
2017-01-04  2.58500  2.63750  2.53825  2.60975  1199220000
2017-01-05  2.61325  2.64550  2.52625  2.54350   984296000
2017-01-06  2.57125  2.60625  2.53000  2.57750   822856000
2017-01-09  2.58750  2.70000  2.58750  2.68200   916248000
Saved 2012 rows for NVDA

Da


