### Finalizing Finance Data Dataset with Prices

The aim of this script is to combine the finance data set with prices.
The finance data set was created in a previous script, it contains data on fundementals and earnings of companies.
The prices data set contains daily prices of companies, set at the end of the day.

After merging the two files I will save it and move on the next step of the project.

In [21]:
# importing librariesq
import pandas as pd
import numpy as np
import os


In [22]:
# setting out the path for the data: Prices and Finance Data

prices_path = '../Data/Datasets/financial_data_prices.csv'

earnings_path = '../Data/Datasets/financial_data_EarningsCal.csv'

finance_path = '../Data/Datasets/starting_financial_data_combined.csv'


In [23]:
# loading the dataset

prices_data = pd.read_csv(prices_path)

earnings_data = pd.read_csv(earnings_path)

finance_data = pd.read_csv(finance_path)


In [None]:
# viewing the data

# viewing the prices data

prices_data


In [None]:
# viewing the finance data

finance_data.head(100)


In [None]:
# viewing the earnings calendar data

earnings_data.head(100)

In [None]:
# creating additional date columns to represent the date x + N from the earnings release date.
# first coluumn will be N = 1
# second column will be N = 7 (1 week)
# third column will be N = 28 (1 month)
# fourth column will be N = 91 (3 months)

earnings_data['date'] = pd.to_datetime(earnings_data['date'])

earnings_data['date_1'] = earnings_data['date'] + pd.DateOffset(days=1)

earnings_data['date_7'] = earnings_data['date'] + pd.DateOffset(days=7)

earnings_data['date_28'] = earnings_data['date'] + pd.DateOffset(days=28)

earnings_data['date_91'] = earnings_data['date'] + pd.DateOffset(days=91)


In [None]:
earnings_data


In [None]:
# for each of the date columns i would like to check if any of the dates fall on weekends. If they do I would like to move them to the next working day. I will do this check by creating additional columns that will provide the day of the week for each date column.

earnings_data['day_of_week'] = earnings_data['date'].dt.day_name()

earnings_data['day_of_week_1'] = earnings_data['date_1'].dt.day_name()

earnings_data['day_of_week_7'] = earnings_data['date_7'].dt.day_name()

earnings_data['day_of_week_28'] = earnings_data['date_28'].dt.day_name()

earnings_data['day_of_week_91'] = earnings_data['date_91'].dt.day_name()

# aggregate the day of the week columsn giving the count of each day of the week for each date column. The aim is to use pivot tables to see how the dates are distributed across the days of the week.

earnings_data.groupby('day_of_week_7').size() # in this case the 91 day column is used as an example. You can change the column to any of the date columns.


In [None]:
# before getting the prices input into the earnings dataset, i will change the dates for any weekend dates to the next working day. Meaning for a statuday date, the date will be moved to the following monday (+2) and for a sunday date, the date will be moved to the monday (+1).

# Adjust weekend dates to the next working day
def adjust_weekend_dates(date):
    if date.weekday() == 5:  # Saturday
        return date + pd.DateOffset(days=2)
    elif date.weekday() == 6:  # Sunday
        return date + pd.DateOffset(days=1)
    else:
        return date


In [None]:
# Apply the adjustment to each date column
date_columns = ['date', 'date_1', 'date_7', 'date_28', 'date_91']
for col in date_columns:
    earnings_data[col] = earnings_data[col].apply(adjust_weekend_dates)

# Display the adjusted earnings_data
earnings_data


In [None]:
# for each of the date columns i would like to check if any of the dates fall on weekends. If they do I would like to move them to the next working day. I will do this check by creating additional columns that will provide the day of the week for each date column.

earnings_data['day_of_week'] = earnings_data['date'].dt.day_name()

earnings_data['day_of_week_1'] = earnings_data['date_1'].dt.day_name()

earnings_data['day_of_week_7'] = earnings_data['date_7'].dt.day_name()

earnings_data['day_of_week_28'] = earnings_data['date_28'].dt.day_name()

earnings_data['day_of_week_91'] = earnings_data['date_91'].dt.day_name()

# aggregate the day of the week columsn giving the count of each day of the week for each date column. The aim is to use pivot tables to see how the dates are distributed across the days of the week.

earnings_data.groupby('day_of_week_91').size() # in this case the 91 day column is used as an example. You can change the column to any of the date columns.

In [None]:
# Adding the close price to the earnings data set based on the date and symbol (across all date columns)

# making sure that the date columns in the price dataset is a datetime object

prices_data['date'] = pd.to_datetime(prices_data['date'])


In [None]:
# # testing looking up the close price for a single date and symbol in the prices data set
#
# filt_test_data = prices_data[(prices_data['symbol'] == 'ABNB') & (prices_data['date'] == '2020-09-30')]
#
# filt_test_data['close'].values[0]


In [None]:
# merging the earnings data with the prices data

# function adds the close price to the earnings data based on the date and symbol

#def get_close_price(row, date_column):
  #  filtered_data = prices_data[(prices_data['symbol'] == row['symbol']) & (prices_data['date'] == row[date_column])]
    #if not filtered_data.empty:
      #  return filtered_data['close'].values[0]
    #else:
      #  return np.nan  # or any other default value you prefer

# Example usage:
# earnings_data['PriceEarningsData'] = earnings_data.apply(lambda row: get_close_price(row, 'date_1'), axis=1)


In [None]:
# defining the function that adds the close price to the earnings data set based on the date and symbol

# def add_price_columns_and_save(earnings_data, prices_data, date_columns, save_filepath):
#     def get_close_price(row, date_column):
#         filtered_data = prices_data[(prices_data['symbol'] == row['symbol']) & (prices_data['date'] == row[date_column])]
#         if not filtered_data.empty:
#             return filtered_data['close'].values[0]
#         else:
#             return np.nan  # or any other default value you prefer
#
#     for date_column in date_columns:
#         price_column = f'price_{date_column}'
#         earnings_data[price_column] = earnings_data.apply(lambda row: get_close_price(row, date_column), axis=1)
#         # Save the DataFrame to a CSV file
#         earnings_data.to_csv(save_filepath, index=False)
#
#     return earnings_data

# trying a modified version of functio to lesson the number of NaN values

def add_price_columns_and_save(earnings_data, prices_data, date_columns, save_filepath):
    def get_close_price(row, date_column):
        for offset in range(4):  # Check up to day +3
            check_date = row[date_column] + pd.DateOffset(days=offset)
            filtered_data = prices_data[(prices_data['symbol'] == row['symbol']) & (prices_data['date'] == check_date)]
            if not filtered_data.empty:
                return filtered_data['close'].values[0]
        return np.nan  # or any other default value you prefer

    for date_column in date_columns:
        price_column = f'price_{date_column}'
        earnings_data[price_column] = earnings_data.apply(lambda row: get_close_price(row, date_column), axis=1)
        # Save the DataFrame to a CSV file
        earnings_data.to_csv(save_filepath, index=False)

    return earnings_data


In [None]:
# testing out the functions on a smaller earnings data set based on 5 symbols and smaller date range

# making a copy of the earnings data set as a test set

earnings_data_test = earnings_data.copy()

# filtering the data set to only include the first 5 symbols

symbols = earnings_data_test['symbol'].unique()[:5]

earnings_data_test = earnings_data_test[earnings_data_test['symbol'].isin(symbols)]

# filtering the data set to only include dates between 2021-01-01 and 2021-01-31

earnings_data_test = earnings_data_test[(earnings_data_test['date'] >= '2023-01-01') & (earnings_data_test['date'] <= '2023-12-31')]

earnings_data_test


In [None]:
# defining the inputs for the test function
save_filepath_test = '../Data/Datasets/updated_earnings_data_test.csv'

earnings_data_test = add_price_columns_and_save(earnings_data_test, prices_data, date_columns, save_filepath_test)

earnings_data_test

In [None]:
# function has worked correctly, now to apply it to the full earnings data set
date_columns = ['date', 'date_1', 'date_7', 'date_28', 'date_91']
save_filepath = '../Data/Datasets/updated_earnings_data.csv'

earnings_data = add_price_columns_and_save(earnings_data, prices_data, date_columns, save_filepath)


In [24]:
# calling read_csv to check the data has been saved correctly as 'updated_earnings_data.csv'

updated_earnings_data = pd.read_csv('../Data/Datasets/updated_earnings_data.csv')

updated_earnings_data.head(10)


Unnamed: 0,date,symbol,eps,epsEstimated,time,revenue,revenueEstimated,updatedFromDate,fiscalDateEnding,date_1,...,day_of_week,day_of_week_1,day_of_week_7,day_of_week_28,day_of_week_91,price_date,price_date_1,price_date_7,price_date_28,price_date_91
0,2025-10-29,AAPL,,,amc,,,2024-12-09,2025-09-28,2025-10-30,...,Wednesday,Thursday,Wednesday,Wednesday,Wednesday,,,,,
1,2025-07-30,AAPL,,,amc,,,2024-12-09,2025-06-28,2025-07-31,...,Wednesday,Thursday,Wednesday,Wednesday,Wednesday,,,,,
2,2025-04-30,AAPL,,,amc,,,2024-12-09,2025-03-28,2025-05-01,...,Wednesday,Thursday,Wednesday,Wednesday,Wednesday,,,,,
3,2025-02-06,AAPL,,2.36,amc,,,2024-12-09,2024-12-28,2025-02-07,...,Thursday,Friday,Thursday,Thursday,Thursday,,,,,
4,2024-10-31,AAPL,1.64,1.6,amc,94930000000.0,94511950000.0,2024-12-09,2024-09-28,2024-11-01,...,Thursday,Friday,Thursday,Thursday,Thursday,225.91,222.91,227.48,237.33,
5,2024-08-01,AAPL,1.4,1.35,amc,85777000000.0,84432540000.0,2024-11-25,2024-06-29,2024-08-02,...,Thursday,Friday,Thursday,Thursday,Thursday,218.36,219.86,213.31,229.79,225.91
6,2024-05-02,AAPL,1.53,1.5,amc,90753000000.0,90366110000.0,2024-11-25,2024-03-30,2024-05-03,...,Thursday,Friday,Thursday,Thursday,Thursday,173.03,183.38,184.57,191.29,218.36
7,2024-02-01,AAPL,2.18,2.1,amc,119575000000.0,117986600000.0,2024-11-25,2023-12-30,2024-02-02,...,Thursday,Friday,Thursday,Thursday,Thursday,186.86,185.85,188.32,180.75,173.03
8,2023-11-02,AAPL,1.46,1.39,amc,89498000000.0,84175800000.0,2024-11-25,2023-09-30,2023-11-03,...,Thursday,Friday,Thursday,Thursday,Thursday,177.57,176.65,182.41,189.95,186.86
9,2023-08-03,AAPL,1.26,1.19,amc,81797000000.0,81685700000.0,2024-11-25,2023-07-01,2023-08-04,...,Thursday,Friday,Thursday,Thursday,Thursday,191.17,181.99,177.97,187.87,177.57


In [25]:
# removing rows where the 'date' is greater than 12M 2024 in order to not include these NAN values

updated_earnings_data = updated_earnings_data[updated_earnings_data['date'] <= '2024-12-01']



In [None]:
# checking where the nan values are in the updated_earnings_data by company

# creating a pivot table where the rows are the symbols and the columns the count of nan values in each of the price_date columns

# Creating a pivot table to count NaN values in each price_date column by symbol
nan_pivot = updated_earnings_data[['symbol', 'price_date', 'price_date_1', 'price_date_7', 'price_date_28', 'price_date_91']].isna().groupby(updated_earnings_data['symbol']).sum()

# Display the pivot table
nan_pivot

In [26]:
# viewing data for specific symbols

#symbol to view

symbol = 'BABA'

# filtering the data to only include the symbol and the columns with nan values for the price_date columns

symbol_data = updated_earnings_data[updated_earnings_data['symbol'] == symbol][['date','symbol', 'day_of_week', 'day_of_week_1', 'day_of_week_7', 'day_of_week_28', 'day_of_week_91',
                                                                                'price_date', 'price_date_1', 'price_date_7', 'price_date_28', 'price_date_91']]

# selecting only rows that contain nan values

symbol_data = symbol_data[symbol_data.isna().any(axis=1)]

# displaying the data

symbol_data

Unnamed: 0,date,symbol,day_of_week,day_of_week_1,day_of_week_7,day_of_week_28,day_of_week_91,price_date,price_date_1,price_date_7,price_date_28,price_date_91
1510,2024-11-15,BABA,Friday,Monday,Friday,Friday,Friday,88.59,89.35,83.13,,
1552,2014-03-31,BABA,Monday,Tuesday,Monday,Monday,Monday,,,,,
1553,2013-12-31,BABA,Tuesday,Wednesday,Tuesday,Tuesday,Tuesday,,,,,
1554,2013-09-30,BABA,Monday,Tuesday,Monday,Monday,Monday,,,,,
1555,2013-07-01,BABA,Monday,Monday,Monday,Monday,Monday,,,,,


In [27]:
# merging the finance data with the updated earnings data. In the updated earnings data, I will only keep selected columns. Selections is made first.

# selecting the columns to keep in the updated earnings data

columns_to_keep = ['date', 'symbol', 'eps', 'epsEstimated',
                         'revenueEstimated', 'updatedFromDate', 'fiscalDateEnding', 'date_1',
                         'date_7', 'date_28', 'date_91', 'day_of_week', 'day_of_week_1',
                         'day_of_week_7', 'day_of_week_28', 'day_of_week_91', 'price_date',
                         'price_date_1', 'price_date_7', 'price_date_28', 'price_date_91']

updated_earnings_data = updated_earnings_data[columns_to_keep]

# viewing the data

updated_earnings_data.head(10)


Unnamed: 0,date,symbol,eps,epsEstimated,revenueEstimated,updatedFromDate,fiscalDateEnding,date_1,date_7,date_28,...,day_of_week,day_of_week_1,day_of_week_7,day_of_week_28,day_of_week_91,price_date,price_date_1,price_date_7,price_date_28,price_date_91
4,2024-10-31,AAPL,1.64,1.6,94511950000.0,2024-12-09,2024-09-28,2024-11-01,2024-11-07,2024-11-28,...,Thursday,Friday,Thursday,Thursday,Thursday,225.91,222.91,227.48,237.33,
5,2024-08-01,AAPL,1.4,1.35,84432540000.0,2024-11-25,2024-06-29,2024-08-02,2024-08-08,2024-08-29,...,Thursday,Friday,Thursday,Thursday,Thursday,218.36,219.86,213.31,229.79,225.91
6,2024-05-02,AAPL,1.53,1.5,90366110000.0,2024-11-25,2024-03-30,2024-05-03,2024-05-09,2024-05-30,...,Thursday,Friday,Thursday,Thursday,Thursday,173.03,183.38,184.57,191.29,218.36
7,2024-02-01,AAPL,2.18,2.1,117986600000.0,2024-11-25,2023-12-30,2024-02-02,2024-02-08,2024-02-29,...,Thursday,Friday,Thursday,Thursday,Thursday,186.86,185.85,188.32,180.75,173.03
8,2023-11-02,AAPL,1.46,1.39,84175800000.0,2024-11-25,2023-09-30,2023-11-03,2023-11-09,2023-11-30,...,Thursday,Friday,Thursday,Thursday,Thursday,177.57,176.65,182.41,189.95,186.86
9,2023-08-03,AAPL,1.26,1.19,81685700000.0,2024-11-25,2023-07-01,2023-08-04,2023-08-10,2023-08-31,...,Thursday,Friday,Thursday,Thursday,Thursday,191.17,181.99,177.97,187.87,177.57
10,2023-05-04,AAPL,1.52,1.43,92960200000.0,2024-11-25,2023-04-01,2023-05-05,2023-05-11,2023-06-01,...,Thursday,Friday,Thursday,Thursday,Thursday,165.79,173.57,173.75,180.09,191.17
11,2023-02-02,AAPL,1.88,1.94,121333300000.0,2024-11-25,2022-12-31,2023-02-03,2023-02-09,2023-03-02,...,Thursday,Friday,Thursday,Thursday,Thursday,150.82,154.5,150.87,145.91,165.79
12,2022-10-27,AAPL,1.29,1.27,88739280000.0,2024-11-25,2022-09-24,2022-10-28,2022-11-03,2022-11-24,...,Thursday,Friday,Thursday,Thursday,Thursday,144.8,155.74,138.88,148.11,143.96
13,2022-07-28,AAPL,1.2,1.16,82791240000.0,2024-11-25,2022-06-25,2022-07-29,2022-08-04,2022-08-25,...,Thursday,Friday,Thursday,Thursday,Thursday,157.35,162.51,165.81,170.03,144.8


In [28]:
# merging the finance data with the updated earnings data, keeping all data from the finance data set and only the data from the updated earnings data set that matches the finance data set. The match will be based on date (date_x in finance dataset) and symbol

# making sure 'date_x'  an 'date' are both datetime objects

finance_data['date_x'] = pd.to_datetime(finance_data['date_x'])

updated_earnings_data['fiscalDateEnding'] = pd.to_datetime(updated_earnings_data['fiscalDateEnding'])


In [29]:
# viewing the data for the symbol 'AAPL' in the finance data set. Sorted by date_x

finance_data[finance_data['symbol'] == 'AAPL'].sort_values('date_x', ascending=False)


Unnamed: 0,addTotalDebt,assetGrowth,assetTurnover,averageInventory,averagePayables,averageReceivables,beta,bookValuePerShare,bookValueperShareGrowth,calendarYear,...,threeYDividendperShareGrowthPerShare,threeYNetIncomeGrowthPerShare,threeYOperatingCFGrowthPerShare,threeYRevenueGrowthPerShare,threeYShareholdersEquityGrowthPerShare,totalDebtToCapitalization,weightedAverageSharesDilutedGrowth,weightedAverageSharesGrowth,workingCapital,zip
0,106629000000,0.100624,0.260096,6725500000.0,58267000000.0,54707500000.0,1.24,3.753628,-0.137951,2024,...,0.135642,-0.2208,0.442328,0.237508,-0.019076,0.65185,-0.006862,-0.009661,-23405000000.0,95014
1,101304000000,-0.017187,0.258667,6198500000.0,46663500000.0,42161000000.0,1.24,4.354308,-0.095859,2024,...,0.122352,0.070692,0.484993,0.143358,0.126469,0.602957,-0.007535,-0.005573,-6189000000.0,95014
2,104590000000,-0.045551,0.268969,6371500000.0,51949500000.0,45626000000.0,1.24,4.815961,0.008022,2024,...,0.170447,0.087751,0.028931,0.101665,0.166326,0.585008,-0.007186,-0.006699,4594000000.0,95014
3,108040000000,0.002641,0.338247,6421000000.0,60378500000.0,55543500000.0,1.24,4.777636,0.199247,2024,...,0.15597,0.287877,0.123787,0.171619,0.22176,0.59317,-0.00611,-0.005748,9719000000.0,95014
4,123930000000,0.052367,0.253835,6841000000.0,54655000000.0,50085500000.0,1.24,3.983862,0.037547,2023,...,0.170403,0.980735,0.147789,0.512628,0.040041,0.64126,-0.006505,-0.006254,-1742000000.0,95014
5,109280000000,0.008664,0.244142,7416500000.0,44822000000.0,37542500000.0,1.24,3.839692,-0.024779,2023,...,0.156923,0.941479,0.781654,0.506035,-0.083647,0.644514,-0.004545,-0.005672,-2304000000.0,95014
6,109615000000,-0.042068,0.285513,7151000000.0,50431500000.0,45039500000.0,1.24,3.937252,0.103066,2023,...,0.194736,1.372661,1.370283,0.796638,-0.124421,0.638139,-0.006811,-0.006643,-7162000000.0,95014
7,111110000000,-0.017032,0.337866,5883000000.0,61016500000.0,57556000000.0,1.24,3.569369,0.129191,2023,...,0.183114,0.499105,0.238259,0.417819,-0.295935,0.662011,-0.010097,-0.008587,-8509000000.0,95014
8,120069000000,0.048901,0.255548,5189500000.0,56229000000.0,51587000000.0,1.24,3.160998,-0.120742,2022,...,0.192725,0.696583,0.357916,0.577379,-0.372494,0.703223,-0.008839,-0.008202,-18577000000.0,95014
9,119691000000,-0.040931,0.246675,5446500000.0,50512500000.0,43821000000.0,1.24,3.595075,-0.131686,2022,...,0.187867,1.189526,1.225337,0.743911,-0.31858,0.673185,-0.008603,-0.007117,-17581000000.0,95014


In [30]:
# viewing the data for the symbol 'AAPL' in the earnings data set. Sorted by fiscalDateEnding

updated_earnings_data[updated_earnings_data['symbol'] == 'AAPL'].sort_values('fiscalDateEnding', ascending=False)



Unnamed: 0,date,symbol,eps,epsEstimated,revenueEstimated,updatedFromDate,fiscalDateEnding,date_1,date_7,date_28,...,day_of_week,day_of_week_1,day_of_week_7,day_of_week_28,day_of_week_91,price_date,price_date_1,price_date_7,price_date_28,price_date_91
4,2024-10-31,AAPL,1.64,1.6,94511950000.0,2024-12-09,2024-09-28,2024-11-01,2024-11-07,2024-11-28,...,Thursday,Friday,Thursday,Thursday,Thursday,225.91,222.91,227.48,237.33,
5,2024-08-01,AAPL,1.4,1.35,84432540000.0,2024-11-25,2024-06-29,2024-08-02,2024-08-08,2024-08-29,...,Thursday,Friday,Thursday,Thursday,Thursday,218.36,219.86,213.31,229.79,225.91
6,2024-05-02,AAPL,1.53,1.5,90366110000.0,2024-11-25,2024-03-30,2024-05-03,2024-05-09,2024-05-30,...,Thursday,Friday,Thursday,Thursday,Thursday,173.03,183.38,184.57,191.29,218.36
7,2024-02-01,AAPL,2.18,2.1,117986600000.0,2024-11-25,2023-12-30,2024-02-02,2024-02-08,2024-02-29,...,Thursday,Friday,Thursday,Thursday,Thursday,186.86,185.85,188.32,180.75,173.03
8,2023-11-02,AAPL,1.46,1.39,84175800000.0,2024-11-25,2023-09-30,2023-11-03,2023-11-09,2023-11-30,...,Thursday,Friday,Thursday,Thursday,Thursday,177.57,176.65,182.41,189.95,186.86
9,2023-08-03,AAPL,1.26,1.19,81685700000.0,2024-11-25,2023-07-01,2023-08-04,2023-08-10,2023-08-31,...,Thursday,Friday,Thursday,Thursday,Thursday,191.17,181.99,177.97,187.87,177.57
10,2023-05-04,AAPL,1.52,1.43,92960200000.0,2024-11-25,2023-04-01,2023-05-05,2023-05-11,2023-06-01,...,Thursday,Friday,Thursday,Thursday,Thursday,165.79,173.57,173.75,180.09,191.17
11,2023-02-02,AAPL,1.88,1.94,121333300000.0,2024-11-25,2022-12-31,2023-02-03,2023-02-09,2023-03-02,...,Thursday,Friday,Thursday,Thursday,Thursday,150.82,154.5,150.87,145.91,165.79
12,2022-10-27,AAPL,1.29,1.27,88739280000.0,2024-11-25,2022-09-24,2022-10-28,2022-11-03,2022-11-24,...,Thursday,Friday,Thursday,Thursday,Thursday,144.8,155.74,138.88,148.11,143.96
13,2022-07-28,AAPL,1.2,1.16,82791240000.0,2024-11-25,2022-06-25,2022-07-29,2022-08-04,2022-08-25,...,Thursday,Friday,Thursday,Thursday,Thursday,157.35,162.51,165.81,170.03,144.8


In [31]:
# merging the finance data with the updated earnings data

merg_finance_data = pd.merge(finance_data, updated_earnings_data, how='left', left_on=['date_x', 'symbol'], right_on=['fiscalDateEnding', 'symbol'])


In [32]:
# viewing the merged data for the symbol 'AAPL' in the finance data set. Sorted by date_x

merg_finance_data[merg_finance_data['symbol'] == 'AAPL'].sort_values('date_x', ascending=False)

Unnamed: 0,addTotalDebt,assetGrowth,assetTurnover,averageInventory,averagePayables,averageReceivables,beta,bookValuePerShare,bookValueperShareGrowth,calendarYear,...,day_of_week,day_of_week_1,day_of_week_7,day_of_week_28,day_of_week_91,price_date,price_date_1,price_date_7,price_date_28,price_date_91
0,106629000000,0.100624,0.260096,6725500000.0,58267000000.0,54707500000.0,1.24,3.753628,-0.137951,2024,...,Thursday,Friday,Thursday,Thursday,Thursday,225.91,222.91,227.48,237.33,
1,101304000000,-0.017187,0.258667,6198500000.0,46663500000.0,42161000000.0,1.24,4.354308,-0.095859,2024,...,Thursday,Friday,Thursday,Thursday,Thursday,218.36,219.86,213.31,229.79,225.91
2,104590000000,-0.045551,0.268969,6371500000.0,51949500000.0,45626000000.0,1.24,4.815961,0.008022,2024,...,Thursday,Friday,Thursday,Thursday,Thursday,173.03,183.38,184.57,191.29,218.36
3,108040000000,0.002641,0.338247,6421000000.0,60378500000.0,55543500000.0,1.24,4.777636,0.199247,2024,...,Thursday,Friday,Thursday,Thursday,Thursday,186.86,185.85,188.32,180.75,173.03
4,123930000000,0.052367,0.253835,6841000000.0,54655000000.0,50085500000.0,1.24,3.983862,0.037547,2023,...,Thursday,Friday,Thursday,Thursday,Thursday,177.57,176.65,182.41,189.95,186.86
5,109280000000,0.008664,0.244142,7416500000.0,44822000000.0,37542500000.0,1.24,3.839692,-0.024779,2023,...,Thursday,Friday,Thursday,Thursday,Thursday,191.17,181.99,177.97,187.87,177.57
6,109615000000,-0.042068,0.285513,7151000000.0,50431500000.0,45039500000.0,1.24,3.937252,0.103066,2023,...,Thursday,Friday,Thursday,Thursday,Thursday,165.79,173.57,173.75,180.09,191.17
7,111110000000,-0.017032,0.337866,5883000000.0,61016500000.0,57556000000.0,1.24,3.569369,0.129191,2023,...,Thursday,Friday,Thursday,Thursday,Thursday,150.82,154.5,150.87,145.91,165.79
8,120069000000,0.048901,0.255548,5189500000.0,56229000000.0,51587000000.0,1.24,3.160998,-0.120742,2022,...,Thursday,Friday,Thursday,Thursday,Thursday,144.8,155.74,138.88,148.11,143.96
9,119691000000,-0.040931,0.246675,5446500000.0,50512500000.0,43821000000.0,1.24,3.595075,-0.131686,2022,...,Thursday,Friday,Thursday,Thursday,Thursday,157.35,162.51,165.81,170.03,144.8


In [33]:

# saving the finance data as "updated_finance_data.csv"

merg_finance_data.to_csv('../Data/Datasets/updated_finance_data.csv', index=False)
