In [1]:
import pandas as pd

In [2]:
datasets_names = ["SP500", "Gold", "EURUSD"]
datasets_n = len(datasets_names)

In [3]:
datasets = [None] * datasets_n
for dt_idx, dt_name in enumerate(datasets_names):
    datasets[dt_idx] = pd.DataFrame(columns=["Date", "Open", "High", "Low", "Close"])
    for csv_no in range(10):
        datasets[dt_idx] = pd.concat([datasets[dt_idx], pd.read_csv(dt_name + "/" + str(csv_no+1) + '.csv',
                                                        thousands=',')])

In [4]:
datasets[0]

Unnamed: 0,Date,Open,High,Low,Close
0,04/01/2013,1569.18,1570.57,1558.47,1562.17
1,03/28/2013,1562.86,1570.28,1561.08,1569.19
2,03/27/2013,1563.75,1564.07,1551.90,1562.85
3,03/26/2013,1551.69,1563.95,1551.69,1563.77
4,03/25/2013,1556.89,1564.91,1546.22,1551.69
...,...,...,...,...,...
248,04/09/2021,4096.11,4129.48,4095.51,4128.80
249,04/08/2021,4089.95,4098.19,4082.54,4097.17
250,04/07/2021,4074.29,4083.13,4068.31,4079.95
251,04/06/2021,4075.57,4086.23,4068.14,4073.94


In [5]:
datasets[1].describe()

Unnamed: 0,Open,High,Low,Close
count,2518.0,2518.0,2518.0,2518.0
mean,1431.339237,1441.190905,1420.711358,1431.089833
std,247.109549,249.286544,244.25742,247.02312
min,1051.5,1062.7,1045.4,1049.6
25%,1243.825,1251.75,1235.8,1244.225
50%,1319.75,1326.95,1310.95,1319.1
75%,1655.975,1666.4,1640.425,1655.125
max,2076.4,2089.2,2049.0,2069.4


In [6]:
dates_analyzed = pd.date_range(start='2013-04-02', end='2022-04-01') # no data for SP500 before 2013-04-02
dates_analyzed = pd.DataFrame(dates_analyzed, columns=["Date"])
dates_analyzed["Date"] = dates_analyzed["Date"].dt.strftime('%m/%d/%Y')
dates_analyzed.head()

Unnamed: 0,Date
0,04/02/2013
1,04/03/2013
2,04/04/2013
3,04/05/2013
4,04/06/2013


In [7]:
datasets[2]

Unnamed: 0,Date,Open,High,Low,Close
0,04/01/2013,1.2820,1.2850,1.2820,1.2850
1,03/29/2013,1.2819,1.2820,1.2819,1.2820
2,03/28/2013,1.2781,1.2819,1.2781,1.2819
3,03/27/2013,1.2862,1.2862,1.2781,1.2781
4,03/26/2013,1.2854,1.2862,1.2854,1.2862
...,...,...,...,...,...
256,04/08/2021,1.1869,1.1928,1.1861,1.1918
257,04/07/2021,1.1873,1.1916,1.1861,1.1873
258,04/06/2021,1.1815,1.1880,1.1796,1.1874
259,04/05/2021,1.1765,1.1822,1.1738,1.1813


In [8]:
datasets = [dates_analyzed.merge(datasets[i], on="Date", how="left") for i in range(datasets_n)]
datasets[0]

Unnamed: 0,Date,Open,High,Low,Close
0,04/02/2013,1562.17,1573.66,1562.17,1570.25
1,04/03/2013,1570.25,1571.47,1549.80,1553.69
2,04/04/2013,1553.69,1562.60,1552.52,1559.98
3,04/05/2013,1559.98,1559.98,1539.50,1553.28
4,04/06/2013,,,,
...,...,...,...,...,...
3282,03/28/2022,4541.09,4575.65,4517.69,4575.52
3283,03/29/2022,4602.86,4637.30,4589.66,4631.60
3284,03/30/2022,4624.20,4627.77,4581.32,4602.45
3285,03/31/2022,4599.02,4603.07,4530.41,4530.41


In [9]:
datasets_merged = pd.concat([datasets[i] for i in range(datasets_n)], axis=1)
datasets_merged = datasets_merged.T.drop_duplicates().T
column_names = ["Open", "High", "Low", "Close"]
merged_column_names_tmp = [col_name + "_" + datasets_names[i] for i in range(datasets_n) for col_name in column_names]
merged_column_names = ["Date"]
merged_column_names.extend(merged_column_names_tmp)
datasets_merged.columns = merged_column_names

In [10]:
datasets_merged.head()

Unnamed: 0,Date,Open_SP500,High_SP500,Low_SP500,Close_SP500,Open_Gold,High_Gold,Low_Gold,Close_Gold,Open_EURUSD,High_EURUSD,Low_EURUSD,Close_EURUSD
0,04/02/2013,1562.17,1573.66,1562.17,1570.25,1600.1,1604.3,1574.0,1575.9,1.285,1.285,1.282,1.282
1,04/03/2013,1570.25,1571.47,1549.8,1553.69,1576.4,1577.3,1549.7,1553.5,1.282,1.285,1.282,1.285
2,04/04/2013,1553.69,1562.6,1552.52,1559.98,1557.7,1559.3,1539.4,1552.4,1.285,1.2937,1.285,1.2937
3,04/05/2013,1559.98,1559.98,1539.5,1553.28,1554.0,1581.8,1549.0,1575.9,1.2937,1.2993,1.2937,1.2993
4,04/06/2013,,,,,,,,,,,,


In [11]:
datasets_merged.tail()

Unnamed: 0,Date,Open_SP500,High_SP500,Low_SP500,Close_SP500,Open_Gold,High_Gold,Low_Gold,Close_Gold,Open_EURUSD,High_EURUSD,Low_EURUSD,Close_EURUSD
3282,03/28/2022,4541.09,4575.65,4517.69,4575.52,1958.7,1959.8,1915.7,1939.8,1.0985,1.1001,1.0945,1.0988
3283,03/29/2022,4602.86,4637.3,4589.66,4631.6,1922.4,1929.4,1888.3,1912.2,1.0984,1.1139,1.0971,1.1088
3284,03/30/2022,4624.2,4627.77,4581.32,4602.45,1924.0,1943.4,1920.1,1939.0,1.1087,1.1172,1.1083,1.1161
3285,03/31/2022,4599.02,4603.07,4530.41,4530.41,1937.3,1955.0,1923.0,1954.0,1.116,1.1186,1.1061,1.1067
3286,04/01/2022,4540.32,4548.7,4507.57,4545.86,1942.4,1944.5,1921.4,1923.7,1.1068,1.1078,1.1028,1.1046
