In [11]:
import pandas as pd
pd.options.plotting.backend = "plotly"

In [12]:
filepath = "records_world.json"
# We read the df from json
df = pd.read_json(filepath, orient="records")

# We set the index to the date
df.set_index("name", inplace=True)
df = df.join(df["data"].apply(pd.Series))
df.drop(columns=["data"], inplace=True)
df = df.iloc[:-3]
df = df.T
df = df.drop("1981", axis=1)

In [13]:
# all columns where its not leap year we insert a NaN value at index 31 + 28 = 59 and shift the rest of the values one position to the right
for col in df.columns:
    if int(col) % 4 != 0:
        df[col][60:] = df[col][59:-1].values
        df[col][59] = None

In [14]:
df.plot()

# We begin to look at the deltas

In [15]:
df_diff = df.T.diff()
df_diff_rolling = df_diff.rolling(1).mean()

# We keep only the 10 last columns
n_years_back = 24
df_diff_rolling = df_diff_rolling.iloc[-n_years_back:]

In [16]:
import pandas as pd
import plotly.graph_objects as go

# Assuming df_diff_rolling is your DataFrame
# Calculate mean and variance
mean_values = df_diff_rolling.mean(axis=1)
variance_values = df_diff_rolling.var(axis=1)

# Create bar plot for mean
fig = go.Figure()

# Add mean bar plot
fig.add_trace(go.Bar(
    x=mean_values.index,
    y=mean_values,
    name='Mean'
))

# Add variance line plot on a secondary y-axis
fig.add_trace(go.Scatter(
    x=variance_values.index,
    y=variance_values,
    mode='lines',
    name='Variance',
    yaxis='y2'  # Plot on secondary y-axis
))

# Update layout
fig.update_layout(
    title='Mean and Variance Overlay with Different Scales',
    xaxis_title='X Axis Title',
    yaxis_title='Mean',
    yaxis2=dict(
        title='Variance',
        overlaying='y',
        side='right'  # Position secondary y-axis on the right side
    )
)

# Show plot
fig.show()


In [17]:
import plotly.graph_objects as go
from plotly.colors import n_colors
import numpy as np
np.random.seed(1)

colors = n_colors('rgb(5, 200, 200)', 'rgb(200, 10, 10)', n_years_back, colortype='rgb')
colors = colors[::-1]

# We reverse the order of the rows in df_diff_rolling to have the most recent year at the top
df_diff_rolling_plot = df_diff_rolling.iloc[::-1]

fig = go.Figure()
for data_line, color in zip(df_diff_rolling_plot.values, colors):
    fig.add_trace(go.Violin(x=data_line, line_color=color))

fig.update_traces(orientation='h', side='positive', width=3, points=False)
fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False)

fig.update_layout(yaxis=dict(tickmode='array', tickvals=np.arange(n_years_back), ticktext=df_diff_rolling_plot.index))
fig.add_shape(type="line", x0=0, y0=0, x1=0, y1=n_years_back, line=dict(color="black", width=1, dash="dot"))
fig.update_xaxes(title_text="Temperature difference (°C) from previous year")
fig.update_layout(showlegend=False)
fig.update_layout(title="Distributions of change in Daily Sea Surface Temperature differences, World")
fig.update_layout(height=1200, width=800)
fig.show()