In [1]:
# Importing the necessary package 
import yfinance 

In [2]:
# Ignoring warning messages
import warnings 
warnings.filterwarnings("ignore")

In [3]:
# Using the .download() method to get our data

raw_data = yfinance.download (tickers = "^GSPC ^FTSE ^N225 ^GDAXI", start = "1994-01-07", end = "2018-01-30", interval = "1d", group_by = 'ticker', auto_adjust = True, treads = True)

# tickers -> The time series we are interested in - (in our case, these are the S&P, FTSE, NIKKEI and DAX)
# start -> The starting date of our data set
# end -> The ending date of our data set (at the time of upload, this is the current date)
# interval -> The distance in time between two recorded observations. Since we're using daily closing prices, we set it equal to "1d", which indicates 1 day. 
# group_by -> The way we want to group the scraped data. Usually we want it to be "ticker", so that we have all the information about a time series in 1 variable.
# auto_adjust -> Automatically adjust the closing prices for each period. 
# treads - > Whether to use threads for mass downloading. 

[*********************100%***********************]  4 of 4 completed


In [4]:
# Creating a back up copy in case we remove/alter elements of the data by mistake
df_comp = raw_data.copy()

In [5]:
# Adding new columns to the data set
df_comp['spx'] = df_comp['^GSPC'].Close
df_comp['dax'] = df_comp['^GDAXI'].Close
df_comp['ftse'] = df_comp['^FTSE'].Close
df_comp['nikkei'] = df_comp['^N225'].Close

In [6]:
df_comp = df_comp.iloc[1:] # Removing the first elements, since we always start 1 period before the first, due to time zone differences of closing prices
del df_comp['^N225']  # Removing the original tickers of the data set
del df_comp['^GSPC']
del df_comp['^GDAXI']
del df_comp['^FTSE']
df_comp=df_comp.asfreq('b') # Setting the frequency of the data
df_comp=df_comp.fillna(method='ffill') # Filling any missing values

In [7]:
df_comp.head() # Displaying the first 5 elements to make sure the data was scraped correctly

Unnamed: 0_level_0,spx,dax,ftse,nikkei
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1994-01-07,469.899994,2224.949951,3446.0,18124.009766
1994-01-10,475.269989,2225.0,3440.600098,18443.439453
1994-01-11,474.130005,2228.100098,3413.800049,18485.25
1994-01-12,474.170013,2182.060059,3372.0,18793.880859
1994-01-13,472.470001,2142.370117,3360.0,18577.259766


In [8]:
df_comp.tail() # Making sure the last day we're including in the series are correct

Unnamed: 0_level_0,spx,dax,ftse,nikkei
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-01-23,2839.129883,13559.599609,7731.799805,24124.150391
2018-01-24,2837.540039,13414.740234,7643.399902,23940.779297
2018-01-25,2839.25,13298.360352,7615.799805,23669.490234
2018-01-26,2872.870117,13340.169922,7665.5,23631.880859
2018-01-29,2853.530029,13324.480469,7671.5,23629.339844


In [9]:
df_comp.reset_index(inplace=True)

In [10]:
df_comp.head()

Unnamed: 0,Date,spx,dax,ftse,nikkei
,,,,,
0.0,1994-01-07,469.899994,2224.949951,3446.0,18124.009766
1.0,1994-01-10,475.269989,2225.0,3440.600098,18443.439453
2.0,1994-01-11,474.130005,2228.100098,3413.800049,18485.25
3.0,1994-01-12,474.170013,2182.060059,3372.0,18793.880859
4.0,1994-01-13,472.470001,2142.370117,3360.0,18577.259766


In [11]:
df_comp.to_csv("Index2018.csv", index=False, )

In [12]:
import pandas as pd

In [13]:
df = pd.read_csv('Index2018.csv')

In [14]:
df.dropna(inplace=True)

In [15]:
df.head()

Unnamed: 0,Date,spx,dax,ftse,nikkei
1,1994-01-07,469.899994,2224.949951,3446.0,18124.009766
2,1994-01-10,475.269989,2225.0,3440.600098,18443.439453
3,1994-01-11,474.130005,2228.100098,3413.800049,18485.25
4,1994-01-12,474.170013,2182.060059,3372.0,18793.880859
5,1994-01-13,472.470001,2142.370117,3360.0,18577.259766


In [17]:
df.to_csv("Index2018.csv", index=False)