In [56]:
import pandas as pd
import numpy as np
import datetime
import yfinance as yf
import string

In [57]:
def preprocess_df(df, prefix, date_col="date", rename_date=False):
    # Create a copy
    df = df.copy()
    
    # Function to add prefix
    add_prefix = lambda x: prefix + x if x != date_col else x
    
    # Fix the column names
    df.columns = (
        df.columns
        .str.strip()
        .str.lower()
        .map(remove_punctuation)
        .map(add_prefix)
    )
    
    # Convert to pandas datetime if needed
    if rename_date:
        date_col = prefix + date_col
        df[date_col] = pd.to_datetime(df[date_col])
    
    return df

In [58]:
symbols = ['^DJI', 'BTC-USD', 'ETH-USD', 'CL=F', 'GC=F', 'SI=F', 'NG=F','DX-Y.NYB', '^BSESN','^NDX', '^GSPC','^NSEI'] 
start = datetime.datetime(2018, 1, 1)
end = datetime.datetime(2024, 1, 31)

# Initialize an empty list to store DataFrames
dfs = []

# Loop through symbols and download data for each
for symbol in symbols:
    df = yf.download(symbol, start, end)
    df.reset_index(inplace=True)
    prefix = symbol
    df = preprocess_df(df, prefix + "_", date_col='Date', rename_date=False)
    dfs.append(df)

# Concatenate the DataFrames
combined_df = pd.concat(dfs, axis=1)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [59]:
# Find all date columns
date_cols = [col for col in combined_df.columns if "date" in col.lower()]

# Merge DataFrames on all date columns
if len(date_cols) > 1:
    combined_df['Date'] = None
    for col in date_cols:
        combined_df['Date'] = combined_df['Date'].combine_first(combined_df[col])
    combined_df.drop(columns=date_cols, inplace=True)
    combined_df.dropna(subset=['Date'], inplace=True)
    print("Combined multiple date columns into a single 'Date' column.")
elif len(date_cols) == 0:
    raise ValueError("No common date column found among the DataFrames")
else:
    common_date_col = date_cols[0]
    combined_df.rename(columns={common_date_col: 'Date'}, inplace=True)
    print("Renamed the common date column to 'Date'.")

# Move the "Date" column to the first position
cols = combined_df.columns.tolist()
cols = ['Date'] + [col for col in cols if col != 'Date']
combined_df = combined_df[cols]

combined_df.head()


Combined multiple date columns into a single 'Date' column.


Unnamed: 0,Date,^DJI_open,^DJI_high,^DJI_low,^DJI_close,^DJI_adj close,^DJI_volume,BTC-USD_open,BTC-USD_high,BTC-USD_low,...,^GSPC_low,^GSPC_close,^GSPC_adj close,^GSPC_volume,^NSEI_open,^NSEI_high,^NSEI_low,^NSEI_close,^NSEI_adj close,^NSEI_volume
0,2018-01-02 00:00:00,24809.349609,24864.189453,24741.699219,24824.009766,24824.009766,341130000.0,14112.200195,14112.200195,13154.700195,...,2682.360107,2695.810059,2695.810059,3397430000.0,10477.549805,10495.200195,10404.650391,10442.200195,10442.200195,153400.0
1,2018-01-03 00:00:00,24850.449219,24941.919922,24825.550781,24922.679688,24922.679688,456790000.0,13625.0,15444.599609,13163.599609,...,2697.77002,2713.060059,2713.060059,3544030000.0,10482.650391,10503.599609,10429.549805,10443.200195,10443.200195,167300.0
2,2018-01-04 00:00:00,24964.859375,25105.960938,24963.269531,25075.130859,25075.130859,403280000.0,14978.200195,15572.799805,14844.5,...,2719.070068,2723.98999,2723.98999,3697340000.0,10469.400391,10513.0,10441.450195,10504.799805,10504.799805,174900.0
3,2018-01-05 00:00:00,25114.919922,25299.789062,25112.009766,25295.869141,25295.869141,358020000.0,15270.700195,15739.700195,14522.200195,...,2727.919922,2743.149902,2743.149902,3239280000.0,10534.25,10566.099609,10520.099609,10558.849609,10558.849609,180900.0
4,2018-01-08 00:00:00,25308.400391,25311.990234,25235.410156,25283.0,25283.0,341390000.0,15477.200195,17705.199219,15202.799805,...,2737.600098,2747.709961,2747.709961,3246160000.0,10591.700195,10631.200195,10588.549805,10623.599609,10623.599609,169000.0


In [60]:
combined_df.to_csv("symbol.csv")