In [6]:
import pandas as pd

usdjpy = pd.read_csv("usdjpy_raw.csv")
gold = pd.read_csv("gold_raw.csv")
spx = pd.read_csv("spx_raw.csv")

unrate = pd.read_csv("UNRATE.csv")
cpi = pd.read_csv("CPIAUCSL.csv")
dgs2 = pd.read_csv("DGS2.csv")
dgs5 = pd.read_csv("DGS5.csv")
dgs10 = pd.read_csv("DGS10.csv")

usdjpy.rename(columns={"USDJPY=X": "usdjpy"}, inplace=True)
gold.rename(columns={"GC=F": "gold"}, inplace=True)
spx.rename(columns={"^GSPC": "spx"}, inplace=True)

unrate.rename(columns={"observation_date": "date", "UNRATE": "unrate"}, inplace=True)
cpi.rename(columns={"observation_date": "date", "CPIAUCSL": "cpi"}, inplace=True)
dgs2.rename(columns={"observation_date": "date", "DGS2": "dgs2"}, inplace=True)
dgs5.rename(columns={"observation_date": "date", "DGS5": "dgs5"}, inplace=True)
dgs10.rename(columns={"observation_date": "date", "DGS10": "dgs10"}, inplace=True)

for df in [usdjpy, gold, spx, unrate, cpi, dgs2, dgs5, dgs10]:
    df["date"] = pd.to_datetime(df["date"])

for df in [usdjpy, gold, spx, unrate, cpi, dgs2, dgs5, dgs10]:
    df.sort_values("date", inplace=True)

unrate = unrate.set_index("date").resample("D").ffill().reset_index()
cpi = cpi.set_index("date").resample("D").ffill().reset_index()

dgs2["dgs2"] = dgs2["dgs2"].ffill()
dgs5["dgs5"] = dgs5["dgs5"].ffill()
dgs10["dgs10"] = dgs10["dgs10"].ffill()

merged = usdjpy.merge(gold, on="date", how="inner")
merged = merged.merge(spx, on="date", how="inner")

merged = merged.merge(dgs2, on="date", how="left")
merged = merged.merge(dgs5, on="date", how="left")
merged = merged.merge(dgs10, on="date", how="left")

merged[["dgs2","dgs5","dgs10"]] = merged[["dgs2","dgs5","dgs10"]].ffill()

merged = merged.merge(unrate, on="date", how="left")
merged = merged.merge(cpi, on="date", how="left")

merged.reset_index(drop=True, inplace=True)

print("Missing values check:")
print(merged.isna().sum())

print("\nFirst 10 rows:")
print(merged.head(10))


Missing values check:
date       0
usdjpy     0
gold       0
spx        0
dgs2       0
dgs5       0
dgs10      0
unrate    54
cpi       54
dtype: int64

First 10 rows:
        date      usdjpy         gold          spx  dgs2  dgs5  dgs10  unrate  \
0 2017-01-03  117.495003  1160.400024  2257.830078  1.22  1.94   2.45     4.7   
1 2017-01-04  117.658997  1163.800049  2270.750000  1.24  1.94   2.46     4.7   
2 2017-01-05  117.112999  1179.699951  2269.000000  1.17  1.86   2.37     4.7   
3 2017-01-06  115.264999  1171.900024  2276.979980  1.22  1.92   2.42     4.7   
4 2017-01-09  117.150002  1183.500000  2268.899902  1.21  1.89   2.38     4.7   
5 2017-01-10  115.985001  1184.199951  2268.899902  1.19  1.89   2.38     4.7   
6 2017-01-11  115.872002  1195.599976  2275.320068  1.20  1.89   2.38     4.7   
7 2017-01-12  115.098000  1198.900024  2270.439941  1.18  1.87   2.36     4.7   
8 2017-01-13  114.664001  1195.300049  2274.639893  1.21  1.90   2.40     4.7   
9 2017-01-17  114.1520

In [7]:
merged.head(30)


Unnamed: 0,date,usdjpy,gold,spx,dgs2,dgs5,dgs10,unrate,cpi
0,2017-01-03,117.495003,1160.400024,2257.830078,1.22,1.94,2.45,4.7,243.618
1,2017-01-04,117.658997,1163.800049,2270.75,1.24,1.94,2.46,4.7,243.618
2,2017-01-05,117.112999,1179.699951,2269.0,1.17,1.86,2.37,4.7,243.618
3,2017-01-06,115.264999,1171.900024,2276.97998,1.22,1.92,2.42,4.7,243.618
4,2017-01-09,117.150002,1183.5,2268.899902,1.21,1.89,2.38,4.7,243.618
5,2017-01-10,115.985001,1184.199951,2268.899902,1.19,1.89,2.38,4.7,243.618
6,2017-01-11,115.872002,1195.599976,2275.320068,1.2,1.89,2.38,4.7,243.618
7,2017-01-12,115.098,1198.900024,2270.439941,1.18,1.87,2.36,4.7,243.618
8,2017-01-13,114.664001,1195.300049,2274.639893,1.21,1.9,2.4,4.7,243.618
9,2017-01-17,114.152,1212.0,2267.889893,1.17,1.84,2.33,4.7,243.618


In [8]:
merged.duplicated().sum()


np.int64(0)

In [9]:
merged.describe()

Unnamed: 0,date,usdjpy,gold,spx,dgs2,dgs5,dgs10,unrate,cpi
count,2288,2288.0,2288.0,2288.0,2288.0,2288.0,2288.0,2234.0,2234.0
mean,2021-07-25 09:11:19.720279808,124.998092,1935.899473,3998.932189,2.490066,2.581731,2.809069,4.555909,278.997246
min,2017-01-03 00:00:00,102.678001,1160.400024,2237.399902,0.09,0.19,0.52,3.4,243.618
25%,2019-04-16 18:00:00,109.433998,1333.300018,2850.819946,1.29,1.6175,1.78,3.7,254.277
50%,2021-07-26 12:00:00,113.550999,1798.649963,3923.969971,2.5,2.66,2.82,4.0,270.654
75%,2023-11-01 06:00:00,144.788998,2002.299957,4682.812378,3.97,3.83,4.0225,4.4,307.276
max,2026-02-17 00:00:00,161.606995,5318.399902,6978.600098,5.19,4.95,4.98,14.8,326.031
std,,18.5313,747.709417,1266.711981,1.589329,1.334299,1.20099,1.81905,27.520069
