In [35]:
import pandas as pd
from pandas import DataFrame
from typing import Set, Any


def remove_others(df: DataFrame, columns: Set[Any]):
    cols_total: Set[Any] = set(df.columns)
    diff: Set[Any] = cols_total - columns
    df.drop(diff, axis=1, inplace=True)

# Importing the csv file as a dataframe and cleaning the data: removing the percentage signs and unwnated columns

df_SP500 = pd.read_csv('S&P 500 Historical Data.csv')
df_SP500
remove_others(df_SP500, {"Date", "Change %"})
df_SP500['Change %'] = df_SP500['Change %'].str.rstrip('%').astype('float')
df_SP500 = df_SP500.rename(columns={"Change %": "SP_500_Change"})
df_SP500


Unnamed: 0,Date,SP_500_Change
0,25/11/2022,-0.03
1,23/11/2022,0.59
2,22/11/2022,1.36
3,21/11/2022,-0.39
4,18/11/2022,0.48
...,...,...
2514,30/11/2012,0.02
2515,29/11/2012,0.43
2516,28/11/2012,0.79
2517,27/11/2012,-0.52


In [36]:
# Importing the FTSE100 data as a dataframe and cleaning.

df_FTSE100 = pd.read_csv('FTSE 100 Historical Data.csv')
df_FTSE100
remove_others(df_FTSE100, {"Date", "Change %"})
df_FTSE100['Change %'] = df_FTSE100['Change %'].str.rstrip('%').astype('float')
df_FTSE100 = df_FTSE100.rename(columns={"Change %": "FTSE100_Change"})
df_FTSE100


Unnamed: 0,Date,FTSE100_Change
0,25/11/2022,0.27
1,24/11/2022,0.02
2,23/11/2022,0.17
3,22/11/2022,1.03
4,21/11/2022,-0.12
...,...,...
2523,30/11/2012,-0.06
2524,29/11/2012,1.15
2525,28/11/2012,0.06
2526,27/11/2012,0.22


In [37]:
from functools import reduce
# Merging the two together

dfs = [df_SP500, df_FTSE100]
merged_df = reduce(lambda left, right: pd.merge(left, right, on=['Date'], how='outer'), dfs)


In [38]:
merged_df

Unnamed: 0,Date,SP_500_Change,FTSE100_Change
0,25/11/2022,-0.03,0.27
1,23/11/2022,0.59,0.17
2,22/11/2022,1.36,1.03
3,21/11/2022,-0.39,-0.12
4,18/11/2022,0.48,0.53
...,...,...,...
2568,28/11/2013,,0.08
2569,02/09/2013,,1.45
2570,04/07/2013,,3.08
2571,18/02/2013,,-0.16


In [39]:
# Droping rows with NaN

df_clean = merged_df.dropna()


In [40]:
df_clean = df_clean.reset_index(drop=True)
df_clean

Unnamed: 0,Date,SP_500_Change,FTSE100_Change
0,25/11/2022,-0.03,0.27
1,23/11/2022,0.59,0.17
2,22/11/2022,1.36,1.03
3,21/11/2022,-0.39,-0.12
4,18/11/2022,0.48,0.53
...,...,...,...
2469,30/11/2012,0.02,-0.06
2470,29/11/2012,0.43,1.15
2471,28/11/2012,0.79,0.06
2472,27/11/2012,-0.52,0.22


In [41]:
import json
# Exporting dataframe as a json file.

exported_values = list(df_clean.T.to_dict().values())
open('Daily Stock Data.json' , 'w').write(json.dumps(exported_values))


177454