In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation

In [2]:
# Load Nikkei data
nikkei_data_path = "nikkei_data.xlsx"
nikkei_data = pd.read_excel(nikkei_data_path, sheet_name='Data', skiprows=4)
nikkei_data_cleaned = nikkei_data.drop(columns=['Unnamed: 0'])
nikkei_data_cleaned.columns = ['Year', 'Nikkei_Index']

In [3]:
# Load Japan GDP data
japan_gdp_data_path = 'japan_gdp_data.xlsx'
# openpyxl engine is used to hand .xlsx files
japan_gdp_data = pd.read_excel(japan_gdp_data_path, engine='openpyxl')
# Transform the wide-form data into long form. This means converting it from having each year as a column
# to a format where each row represents a single year and its corresponding GDP value.
# 'id_vars' specifies a column that will remain vertical (i.e., the identifier variable),
# while the names of the other columns will be 'melted' into a single 'Year' column with corresponding 'GDP' values.
japan_gdp_data_long = japan_gdp_data.melt(id_vars=['GDP, current prices (Billions of U.S. dollars)'], var_name='Year', value_name='GDP')
# Drop any rows with NaN or null values
japan_gdp_data_long = japan_gdp_data_long.dropna()
# Remove the unnecessary 'GDP, current prices (Billions of U.S. dollars)' column since it is no longer needed
# after the 'melt' operation. The remaining data will just consist of 'Year' and 'GDP'.
japan_gdp_data_cleaned = japan_gdp_data_long.drop(columns=['GDP, current prices (Billions of U.S. dollars)'])
# Convert the 'Year' column to an integer data type to ensure consistency in the merged dataset
# and facilitate any further analysis or plotting.
japan_gdp_data_cleaned['Year'] = japan_gdp_data_cleaned['Year'].astype(int)



In [4]:
# Merge two datasets
# Merge the cleaned Nikkei index data with the cleaned GDP data into a single DataFrame.
# The merge is performed on the 'Year' column, which is common to both datasets.
# The 'inner' join means that only years present in both datasets will be included in the merged dataset.
# If there is a year that does not appear in both datasets, it will not be included in the final DataFrame.
merged_data = pd.merge(nikkei_data_cleaned, japan_gdp_data_cleaned, on='Year', how='inner')
# Sort the merged data by the 'Year' column in ascending order. This is necessary to ensure that the data
# is in chronological order, which is especially important for time-series analysis and for creating
# animations that show the progression of data over time. The 'inplace=True' parameter makes this operation
# modify the DataFrame in place, without the need to assign the result to a new variable.
merged_data.sort_values('Year', inplace=True)

In [19]:
# Create animated line graph
# Set the size of the figure to 10 inches by 6 inches.
fig, ax1 = plt.subplots(figsize=(10, 6))
# Set the title of the graph to 'Japan GDP vs Nikkei Index from 1980-2023' with a font size of 14.
ax1.set_title('Japan GDP vs Nikkei Index from 1980-2023', fontsize=14)
# Label the x-axis as 'Year' with a font size of 12.
ax1.set_xlabel('Year', fontsize=12)
# Create a second y-axis (ax2) that shares the same x-axis as ax1, which will be used to plot the GDP data.
# This is done so that the two lines can be plotted on different scales on the same graph.
ax2=ax1.twinx()
# Set the label for the ax1 y-axis as 'Nikkei Index' with a font size of 12 and a blue color to match the line color.
ax1.set_ylabel('Nikkei Index', color='tab:blue', fontsize=12)
# Set the label for the ax2 y-axis as 'GDP (Billions of USD)' with a font size of 12 and a red color to match the line color.
ax2.set_ylabel('GDP (Billions of USD)', color='tab:red', fontsize=12)
# Initialize two empty lines. One will be for the Nikkei Index (blue line) and the other for the GDP (red line).
# These lines will be updated during the animation. The label argument assigns a label for the legend.
line1, = ax1.plot([], [], 'b-', label='Nikkei Index')
line2, = ax2.plot([], [], 'r-', label='GDP')
# Place a legend on the upper left and upper right of the graph to help identify the lines.
ax1.legend(loc="upper left")
ax2.legend(loc="upper right")
# Define the update function that will be called for each frame of the animation.
# This function takes one argument 'frame' which is an integer that represents the frame number.

year_text = ax1.text(0.95, 0.9, '', transform=ax1.transAxes, ha='right')  # You can adjust the position
def update(frame):
  # Get the data up to the current frame for the year, Nikkei Index, and GDP.
  # iloc indexes by position meaning it takes integers to select rows at particular
  # positions in the dataframe's index (starting at 0)
  year = merged_data['Year'].iloc[:frame+1]
  nikkei = merged_data['Nikkei_Index'].iloc[:frame+1]
  gdp = merged_data['GDP'].iloc[:frame+1]
  # Update the data for both lines with the values for the current frame.
  line1.set_data(year, nikkei)
  line2.set_data(year, gdp)
  # Set the x-axis limit to the range of years in the data.
  ax1.set_xlim(min(merged_data['Year']), max(merged_data['Year']))
  ax2.set_xlim(min(merged_data['Year']), max(merged_data['Year']))
  # Set the y-axis limit to a little above the maximum values in the data for both Nikkei Index and GDP.
  # This ensures the lines have some space above them and are not touching the top of the graph.
  ax1.set_ylim(0, max(merged_data['Nikkei_Index']) * 1.1)
  ax2.set_ylim(0, max(merged_data['GDP']) * 1.1)
  current_year = merged_data['Year'].iloc[frame]
  text_color = 'black'  # or any color that stands out on your background
  year_text.set_text(f'Year: {current_year}')
  year_text.set_color(text_color)
  year_text.set_fontsize(14)  # Increase font size as needed
  return line1, line2, year_text

# Create the animation by using FuncAnimation. This function will call 'update' function for each frame.
# 'frames=len(merged_data)' tells FuncAnimation how many frames the animation will have,
# 'blit=True' makes the animation run smoother by only redrawing the parts that have changed,
# 'repeat=False' ensures that the animation will not loop once it finishes.
ani = FuncAnimation(fig, update, frames=len(merged_data), blit=True, repeat=False)


# Save the animation to a file named 'japan_gdp_vs_nikkei.mp4'. The 'writer' specifies that ffmpeg will be used to write the video file.
# 'fps=2' sets the frames per second, so each frame will be displayed for half a second.
video_path = 'japan.gdpvs_nikkei.mp4'
ani.save(video_path, writer='ffmpeg', fps=1.5)
# Close the figure to prevent it from displaying.
plt.close(fig)
