In [1]:
import pandas as pd

# Load each dataset
historical_prices = pd.read_csv('cleaned_historical_prices_with_date.csv')
crude_oil_prices = pd.read_csv('crude_oil_prices.csv')
gdp_growth_rates = pd.read_csv('gdp_growth_rates.csv')
inflation_rates = pd.read_csv('inflation_rates.csv')
unemployment_rates = pd.read_csv('unemployment_rates.csv')

print(historical_prices.head(10))
print(crude_oil_prices.head(10))
print(gdp_growth_rates.head(10))
print(inflation_rates.head(10))
print(unemployment_rates.head(10))


         Date    Low   High   Open  DailyChange  Price_MA_7
0  02/01/2020  24.90  24.90  24.90    -2.352941   24.961429
1  03/01/2020  25.34  25.37  25.34     3.071948   24.927143
2  06/01/2020  24.70  24.96  24.95    -1.276935   24.765714
3  07/01/2020  24.59  24.85  24.59     1.869919   24.808571
4  08/01/2020  24.57  24.88  24.88    -2.380952   24.852857
5  09/01/2020  24.84  25.20  24.84     1.900526   25.048571
6  10/01/2020  24.71  24.82  24.82     0.283861   25.120000
7  13/01/2020  24.55  24.73  24.55     1.189988   25.222857
8  14/01/2020  24.20  24.33  24.28    -2.675719   25.350000
9  15/01/2020  24.82  25.08  24.95    -1.300749   25.470000
         Date  Price_EUR(Quartly Crude Oil Prices)
0  01/01/2020                              45.2430
1  01/04/2020                              27.6117
2  01/07/2020                              36.9026
3  01/10/2020                              36.3342
4  01/01/2021                              50.0446
5  01/04/2021                     

In [2]:
import pandas as pd

# Load datasets again for clarity
historical_prices = pd.read_csv('cleaned_historical_prices_with_date.csv')
crude_oil_prices = pd.read_csv('crude_oil_prices.csv')

# Convert Date columns to datetime format
historical_prices['Date'] = pd.to_datetime(historical_prices['Date'], dayfirst=True)
crude_oil_prices['Date'] = pd.to_datetime(crude_oil_prices['Date'], dayfirst=True)

# Set Date as index for crude oil prices and forward fill to propagate prices
crude_oil_prices.set_index('Date', inplace=True)
crude_oil_prices = crude_oil_prices.resample('D').ffill().reset_index()

# Merge datasets on Date
merged_data = pd.merge(historical_prices, crude_oil_prices, on='Date', how='left')

# Save the merged data to inspect
merged_data.to_csv('merged_historical_and_crude_oil_prices.csv', index=False)

# Display the first few rows to verify
print(merged_data.head(10))


        Date    Low   High   Open  DailyChange  Price_MA_7  \
0 2020-01-02  24.90  24.90  24.90    -2.352941   24.961429   
1 2020-01-03  25.34  25.37  25.34     3.071948   24.927143   
2 2020-01-06  24.70  24.96  24.95    -1.276935   24.765714   
3 2020-01-07  24.59  24.85  24.59     1.869919   24.808571   
4 2020-01-08  24.57  24.88  24.88    -2.380952   24.852857   
5 2020-01-09  24.84  25.20  24.84     1.900526   25.048571   
6 2020-01-10  24.71  24.82  24.82     0.283861   25.120000   
7 2020-01-13  24.55  24.73  24.55     1.189988   25.222857   
8 2020-01-14  24.20  24.33  24.28    -2.675719   25.350000   
9 2020-01-15  24.82  25.08  24.95    -1.300749   25.470000   

   Price_EUR(Quartly Crude Oil Prices)  
0                               45.243  
1                               45.243  
2                               45.243  
3                               45.243  
4                               45.243  
5                               45.243  
6                             

In [3]:
# Load the inflation rates dataset
inflation_rates = pd.read_csv('inflation_rates.csv')

# Convert the Date column to datetime format
inflation_rates['Date'] = pd.to_datetime(inflation_rates['Date'], dayfirst=True)

# Set Date as index for inflation rates and forward fill to propagate rates
inflation_rates.set_index('Date', inplace=True)
inflation_rates = inflation_rates.resample('D').ffill().reset_index()

# Merge with the previously merged dataset
merged_historical_crude_inflation = pd.merge(merged_data, inflation_rates, on='Date', how='left')

# Save the merged data to inspect
merged_historical_crude_inflation.to_csv('merged_historical_crude_inflation.csv', index=False)

# Display the first few rows to verify
print(merged_historical_crude_inflation.head(10))

        Date    Low   High   Open  DailyChange  Price_MA_7  \
0 2020-01-02  24.90  24.90  24.90    -2.352941   24.961429   
1 2020-01-03  25.34  25.37  25.34     3.071948   24.927143   
2 2020-01-06  24.70  24.96  24.95    -1.276935   24.765714   
3 2020-01-07  24.59  24.85  24.59     1.869919   24.808571   
4 2020-01-08  24.57  24.88  24.88    -2.380952   24.852857   
5 2020-01-09  24.84  25.20  24.84     1.900526   25.048571   
6 2020-01-10  24.71  24.82  24.82     0.283861   25.120000   
7 2020-01-13  24.55  24.73  24.55     1.189988   25.222857   
8 2020-01-14  24.20  24.33  24.28    -2.675719   25.350000   
9 2020-01-15  24.82  25.08  24.95    -1.300749   25.470000   

   Price_EUR(Quartly Crude Oil Prices)  Inflation_Rates(%)  
0                               45.243                0.99  
1                               45.243                0.99  
2                               45.243                0.99  
3                               45.243                0.99  
4           

In [4]:
# Load the GDP growth rates dataset
gdp_growth_rates = pd.read_csv('gdp_growth_rates.csv')

# Convert the Date column to datetime format
gdp_growth_rates['Date'] = pd.to_datetime(gdp_growth_rates['Date'], dayfirst=True)

# Set Date as index for GDP growth rates and forward fill to propagate rates
gdp_growth_rates.set_index('Date', inplace=True)
gdp_growth_rates = gdp_growth_rates.resample('D').ffill().reset_index()

# Merge with the previously merged dataset
merged_historical_crude_inflation_gdp = pd.merge(merged_historical_crude_inflation, gdp_growth_rates, on='Date', how='left')

# Save the merged data to inspect
merged_historical_crude_inflation_gdp.to_csv('merged_historical_crude_inflation_gdp.csv', index=False)

# Display the first few rows to verify
print(merged_historical_crude_inflation_gdp.head(10))

        Date    Low   High   Open  DailyChange  Price_MA_7  \
0 2020-01-02  24.90  24.90  24.90    -2.352941   24.961429   
1 2020-01-03  25.34  25.37  25.34     3.071948   24.927143   
2 2020-01-06  24.70  24.96  24.95    -1.276935   24.765714   
3 2020-01-07  24.59  24.85  24.59     1.869919   24.808571   
4 2020-01-08  24.57  24.88  24.88    -2.380952   24.852857   
5 2020-01-09  24.84  25.20  24.84     1.900526   25.048571   
6 2020-01-10  24.71  24.82  24.82     0.283861   25.120000   
7 2020-01-13  24.55  24.73  24.55     1.189988   25.222857   
8 2020-01-14  24.20  24.33  24.28    -2.675719   25.350000   
9 2020-01-15  24.82  25.08  24.95    -1.300749   25.470000   

   Price_EUR(Quartly Crude Oil Prices)  Inflation_Rates(%)  \
0                               45.243                0.99   
1                               45.243                0.99   
2                               45.243                0.99   
3                               45.243                0.99   
4      

In [5]:
# Load the Unemployment Rates dataset
unemployment_rates = pd.read_csv('unemployment_rates.csv')

# Convert the Date column to datetime format
unemployment_rates['Date'] = pd.to_datetime(unemployment_rates['Date'], dayfirst=True)

# Set Date as index for Unemployment Rates and forward fill to propagate rates
unemployment_rates.set_index('Date', inplace=True)
unemployment_rates = unemployment_rates.resample('D').ffill().reset_index()

# Merge with the previously merged dataset
final_merged_data = pd.merge(merged_historical_crude_inflation_gdp, unemployment_rates, on='Date', how='left')

# Save the merged data to inspect
final_merged_data.to_csv('final_merged_data.csv', index=False)

# Display the first few rows to verify
print(final_merged_data.head(10))

        Date    Low   High   Open  DailyChange  Price_MA_7  \
0 2020-01-02  24.90  24.90  24.90    -2.352941   24.961429   
1 2020-01-03  25.34  25.37  25.34     3.071948   24.927143   
2 2020-01-06  24.70  24.96  24.95    -1.276935   24.765714   
3 2020-01-07  24.59  24.85  24.59     1.869919   24.808571   
4 2020-01-08  24.57  24.88  24.88    -2.380952   24.852857   
5 2020-01-09  24.84  25.20  24.84     1.900526   25.048571   
6 2020-01-10  24.71  24.82  24.82     0.283861   25.120000   
7 2020-01-13  24.55  24.73  24.55     1.189988   25.222857   
8 2020-01-14  24.20  24.33  24.28    -2.675719   25.350000   
9 2020-01-15  24.82  25.08  24.95    -1.300749   25.470000   

   Price_EUR(Quartly Crude Oil Prices)  Inflation_Rates(%)  \
0                               45.243                0.99   
1                               45.243                0.99   
2                               45.243                0.99   
3                               45.243                0.99   
4      

In [6]:
import pandas as pd

# Load the final merged dataset
final_data = pd.read_csv('final_merged_data.csv')

# Display the first few rows of the dataset to verify
print(final_data.head(10))
print(final_data.tail(10))

         Date    Low   High   Open  DailyChange  Price_MA_7  \
0  2020-01-02  24.90  24.90  24.90    -2.352941   24.961429   
1  2020-01-03  25.34  25.37  25.34     3.071948   24.927143   
2  2020-01-06  24.70  24.96  24.95    -1.276935   24.765714   
3  2020-01-07  24.59  24.85  24.59     1.869919   24.808571   
4  2020-01-08  24.57  24.88  24.88    -2.380952   24.852857   
5  2020-01-09  24.84  25.20  24.84     1.900526   25.048571   
6  2020-01-10  24.71  24.82  24.82     0.283861   25.120000   
7  2020-01-13  24.55  24.73  24.55     1.189988   25.222857   
8  2020-01-14  24.20  24.33  24.28    -2.675719   25.350000   
9  2020-01-15  24.82  25.08  24.95    -1.300749   25.470000   

   Price_EUR(Quartly Crude Oil Prices)  Inflation_Rates(%)  \
0                               45.243                0.99   
1                               45.243                0.99   
2                               45.243                0.99   
3                               45.243                0.99