In [44]:
%matplotlib inline
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import json 

In [45]:
def load_data(currency):
    folder = f"data"
    all_data = []

    if not os.path.exists(folder):
        return pd.DataFrame()
    
    for file in sorted(os.listdir(folder)):
        if file.endswith(".json"):
            with open(os.path.join(folder, file), "r") as f:
                data = json.load(f)
                print(json.dumps(data, indent=4))
                try:
                    items = data["exchangeRates"]["item"]
                    date = data["exchangeRates"]["@currencyDate"]
                    if isinstance(items, dict):
                        items = [items]
                    for rate in items:
                        to_code = rate["@currencyCode"]
                        value = float(rate["inverseRate"])
                        all_data.append({"date": date, "to": to_code, "rate": value})
                except Exception as e:
                    pass
    df = pd.DataFrame(all_data)
    if not df.empty:
        df["date"] = pd.to_datetime(df["date"])
    return df

In [46]:
currencies = ["USD", "EUR", "GBP", "JPY", "CNY"]
all_frames = {}

for c in currencies:
    all_frames[c] = load_data(c)

In [42]:
for currency, df in all_frames.items():
    if df.empty:
        print(f"No data for {currency}")
        continue

    top_currencies = df["to"].value_counts().head(3).index

    for to_curr in top_currencies:
        sub = df[df["to"] == to_curr]

        plt.figure(figsize=(10, 4))
        sns.lineplot(data=sub, x="date", y="rate")
        plt.title(f"{currency} to {to_curr} exchange rate over time")
        plt.xlabel("Date")
        plt.ylabel("Exchange Rate")
        plt.show()

No data for USD
No data for EUR
No data for GBP
No data for JPY
No data for CNY


In [43]:
for currency, df in all_frames.items():
    if df.empty:
        continue

    df = df.sort_values(["to", "date"])
    df["change"] = df.groupby("to")["rate"].diff().abs()

    mean_volatility = df.groupby("to")["change"].mean().sort_values(ascending=False)
    print(f"Volatility for {currency}:")
    print(mean_volatility.head(3))
    print()

###Observations (Quest Book)

1. Some currencies like JPY and CNY show smoother trends.
2. Volatility increased around early 2020, possibly due to COVID-19.
3. USD and EUR are among the most stable base currencies.
4. Major economic events like Brexit in 2016 may show sharp spikes in GBP-related pairs.
5. Exchange rates fluctuate more when base is from developing countries. 

In [1]:
import os
import json
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Define the correct base directory for JSON files
BASE_DIR = os.path.join(os.getcwd(), "Take Home Final/Data")

currencies = ["USD", "EUR", "GBP", "JPY", "CNY"]

# Function to load JSON data for a specific currency
def load_data(currency):
    folder = os.path.join(BASE_DIR, currency)  # Reference correct folder
    all_data = []

    if not os.path.exists(folder):
        print(f"Folder missing for {currency}, skipping.")
        return pd.DataFrame()

    for file in sorted(os.listdir(folder)):
        if file.endswith(".json"):
            try:
                with open(os.path.join(folder, file), "r", encoding="utf-8") as f:
                    data = json.load(f)

                if "exchange_rates" in data and "@date" in data["exchange_rates"]:
                    date = data["exchange_rates"]["@date"]
                    rate_info = data["exchange_rates"].get("exchange_rate", [])

                    if isinstance(rate_info, dict):  # Handle single entry format
                        rate_info = [rate_info]

                    for rate in rate_info:
                        if "@to_currency" in rate and "rate" in rate:
                            to_code = rate["@to_currency"]
                            value = float(rate["rate"])
                            all_data.append({"date": date, "to": to_code, "rate": value})
            except json.JSONDecodeError:
                print(f"Error reading {file}, skipping.")
                continue

    df = pd.DataFrame(all_data)

    # Ensure 'date' is properly formatted
    if "date" in df.columns:
        df["date"] = pd.to_datetime(df["date"], errors="coerce")

    return df.dropna()

# Load data for all selected currencies
all_frames = {currency: load_data(currency) for currency in currencies}

# Print dataset summary
for currency, df in all_frames.items():
    if df.empty:
        print(f"No data found for {currency}.")
    else:
        print(f"{currency}: Loaded {df.shape[0]} records.")

# Function to plot exchange rate trends over time
def plot_trends(data_frames):
    for currency, df in data_frames.items():
        if df.empty:
            continue

        top_currencies = df["to"].value_counts().head(3).index

        for to_curr in top_currencies:
            sub = df[df["to"] == to_curr]

            plt.figure(figsize=(10, 4))
            sns.lineplot(data=sub, x="date", y="rate")
            plt.title(f"{currency} to {to_curr} Exchange Rate Over Time")
            plt.xlabel("Date")
            plt.ylabel("Exchange Rate")
            plt.show()

# Function to analyze volatility in exchange rates
def analyze_volatility(data_frames):
    for currency, df in data_frames.items():
        if df.empty:
            continue

        df = df.sort_values("date")
        df["change"] = df.groupby("to")["rate"].diff().abs()

        mean_volatility = df.groupby("to")["change"].mean().sort_values(ascending=False)
        print(f"Volatility for {currency}:")
        print(mean_volatility.head(3))
        print()

# Function to visualize volatility distribution
def plot_volatility(data_frames):
    for currency, df in data_frames.items():
        if df.empty:
            continue

        df["change"] = df.groupby("to")["rate"].diff().abs()

        plt.figure(figsize=(10, 4))
        sns.histplot(df["change"].dropna(), bins=50, kde=True)
        plt.title(f"{currency} Exchange Rate Volatility")
        plt.xlabel("Rate Change")
        plt.ylabel("Frequency")
        plt.show()

# Function to plot currency correlations
def plot_correlation(data_frames):
    # Filter out empty DataFrames
    valid_frames = [df for df in data_frames.values() if not df.empty]

    if not valid_frames:
        print("No valid data available for correlation analysis.")
        return

    combined_df = pd.concat(valid_frames, ignore_index=True)

    # Ensure 'date' exists and is formatted correctly
    if "date" not in combined_df.columns:
        print("'date' column missing in combined dataset.")
        return

    combined_df["date"] = pd.to_datetime(combined_df["date"], errors="coerce")
    combined_df = combined_df.dropna(subset=["date"])  # Remove invalid dates

    # Aggregate duplicate date-to_currency entries
    combined_df = combined_df.groupby(["date", "to"], as_index=False).agg({"rate": "mean"})

    # Pivot data for correlation analysis
    pivot_df = combined_df.pivot(index="date", columns="to", values="rate")

    if pivot_df.empty:
        print("No data available for correlation analysis after pivoting.")
        return

    plt.figure(figsize=(8, 6))
    sns.heatmap(pivot_df.corr(), annot=True, cmap="coolwarm")
    plt.title("Currency Correlations")
    plt.show()

# Run analysis and visualization
analyze_volatility(all_frames)
plot_trends(all_frames)
plot_volatility(all_frames)
plot_correlation(all_frames)

Folder missing for USD, skipping.
Folder missing for EUR, skipping.
Folder missing for GBP, skipping.
Folder missing for JPY, skipping.
Folder missing for CNY, skipping.
No data found for USD.
No data found for EUR.
No data found for GBP.
No data found for JPY.
No data found for CNY.
No valid data available for correlation analysis.
