# Part IV: Data Cleaning

In [1]:
import os
import json
import requests
import pandas as pd
import openpyxl

# Read the data
us_data = pd.read_excel('data/US_EC1B1_Dataset.xlsx')
germany_data = pd.read_excel('data/Germany_EC1B1_Dataset.xlsx')

def reindex(df):
    # Set the first row (index 0) as the new header
    df.columns = df.iloc[0]  # Use the first row as the header
    df = df.drop(0)  # Drop the first row (since it was the header now)
    
    # Reset index after dropping the row
    df.reset_index(drop=True, inplace=True)
    
    # Name the first 2 columns
    df.columns = [f'Country' if i == 0 
                  else f'Month-Year' if i == 1
                  else col for i, col in enumerate(df.columns)]

    return df

us_df = reindex(us_data)
germany_df = reindex(germany_data)

## Merging the Datasets (Append)

In [2]:
# Get the union of the columns from both datasets
all_columns = list(set(germany_df.columns) | set(us_df.columns))

# Ensure both datasets have all columns, adding None for missing columns
germany_data = germany_df.reindex(columns=all_columns, fill_value=None)
us_data = us_df.reindex(columns=all_columns, fill_value=None)

# Append the datasets (one above the other)
combined_data = pd.concat([germany_data, us_data], ignore_index=True)
combined_data = combined_data[['Month-Year', 
                         'Country',
                         'International Reserves and Liquidity, Reserves, Official Reserve Assets, US Dollar',
                         'Prices, Consumer Price Index, All items, Index',
                         'Economic Activity, Industrial Production, Index',
                         'Exchange Rates, National Currency Per U.S. Dollar, Period Average, Rate']]

# Check the result
combined_data

Unnamed: 0,Month-Year,Country,"International Reserves and Liquidity, Reserves, Official Reserve Assets, US Dollar","Prices, Consumer Price Index, All items, Index","Economic Activity, Industrial Production, Index","Exchange Rates, National Currency Per U.S. Dollar, Period Average, Rate"
0,Dec 1959,Germany,4811.474341,24.616929,32.500305,4.2
1,Jan 1960,Germany,4724.155785,24.616929,31.193881,4.2
2,Feb 1960,Germany,4806.36283,24.477068,31.041599,4.2
3,Mar 1960,Germany,4966.456016,24.477068,32.203755,4.2
4,Apr 1960,Germany,5236.120624,24.616929,34.287622,4.2
...,...,...,...,...,...,...
741,Aug 1990,United States,78908.838357,60.351608,,
742,Sep 1990,United States,80024.166133,60.856066,,
743,Oct 1990,United States,82852.196532,61.222946,,
744,Nov 1990,United States,83059.402774,61.360525,,
