In [4]:
import yfinance as yf
import os
import pandas as pd
from datetime import datetime

# This is the list of selected stock tickers
tickers = [
    "AAPL", "MSFT", "AMD", "INTC", "CRM",   # From Tech
    "JPM", "BAC", "WFC", "GS", "SCHW"       # From Finance
]

output_folder = "stock_data"
os.makedirs(output_folder, exist_ok=True)

# 8 years from today
end_date = datetime.today()
start_date = end_date.replace(year=end_date.year - 8)

for ticker in tickers:
    print(f"Fetching data for {ticker}...")
    data = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'), interval='1d')
    
    if not data.empty:
        data.reset_index(inplace=True)
        csv_path = os.path.join(output_folder, f"{ticker}.csv")
        data.to_csv(csv_path, index=False)
        print(f"Saved {ticker} data to {csv_path}")
    else:
        print(f"No data found for {ticker}")

print("Done")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for AAPL...
Saved AAPL data to stock_data\AAPL.csv
Fetching data for MSFT...
Saved MSFT data to stock_data\MSFT.csv
Fetching data for AMD...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Saved AMD data to stock_data\AMD.csv
Fetching data for INTC...
Saved INTC data to stock_data\INTC.csv
Fetching data for CRM...
Saved CRM data to stock_data\CRM.csv
Fetching data for JPM...
Saved JPM data to stock_data\JPM.csv
Fetching data for BAC...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Saved BAC data to stock_data\BAC.csv
Fetching data for WFC...
Saved WFC data to stock_data\WFC.csv
Fetching data for GS...
Saved GS data to stock_data\GS.csv
Fetching data for SCHW...


[*********************100%***********************]  1 of 1 completed

Saved SCHW data to stock_data\SCHW.csv
Done





In [None]:
# here we are trying to merge all the stock data into one csv file

# we keep only the colunms from yfinance and columns  company and ticker then merger it 

import yfinance as yf
import pandas as pd
from datetime import datetime

stocks = {
    "AAPL": "Apple",
    "MSFT": "Microsoft",
    "AMD": "AMD",
    "INTC": "Intel",
    "CRM": "Salesforce",
    "JPM": "JPMorgan",
    "BAC": "Bank of America",
    "WFC": "Wells Fargo",
    "GS": "Goldman Sachs",
    "SCHW": "Charles Schwab"
}

end_date = datetime.today()
start_date = end_date.replace(year=end_date.year - 8)

all_data = []

for ticker, name in stocks.items():
    print(f"Fetching: {ticker}")
    df = yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'), interval='1d')
    
    if not df.empty:
        df = df[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
        df.reset_index(inplace=True)
        df['company'] = name
        df['ticker'] = ticker
        all_data.append(df)
    else:
        print(f"No data for {ticker}")

merged_df = pd.concat(all_data, ignore_index=True)

merged_df.sort_values(by=['Date', 'ticker'], inplace=True)

merged_df.to_csv("cleaned_all_stocks_8_years.csv", index=False)
print("Done")


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching: AAPL
Fetching: MSFT
Fetching: AMD
Fetching: INTC



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Fetching: CRM
Fetching: JPM
Fetching: BAC
Fetching: WFC


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Fetching: GS
Fetching: SCHW
Done


In [7]:
# there was some inconsistency in the data too many columns were craeted due improper mergering 
# we drop the extra columns

import pandas as pd

df = pd.read_csv("cleaned_all_stocks_8_years.csv")

expected_cols = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'company', 'ticker']
df = df[expected_cols]

df.dropna(inplace=True)

df.to_csv("final_stocks_8y_cleaned.csv", index=False)
print("Done ")


Done 


  df = pd.read_csv("cleaned_all_stocks_8_years.csv")
