In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(data=[0,11,23,41,61], columns=['data'])
df.head() # shape (5,1), len = 5

Unnamed: 0,data
0,0
1,11
2,23
3,41
4,61


# Let's assume there is a seasonal pattern m=2

In [3]:
df['data'] # len = 5
# First-order differencing using np.diff()
df_diff = np.diff(df['data'], n=1)
df_diff  
# numpy.ndarray
# len = 4

array([11, 12, 18, 20])

# Seasonal differencing using Numpy np.diff() - Numpy array

In [4]:
# First order seasonal differencing
# df_seasonal_diff = np.diff(df['data'], n=2) # ERROR. This is wrong. n=2 only implies first-order differencing twice. No shift or lag applied
df_np_seasonal_diff_np = df['data'][2:].values - df['data'][:-2].values
df_np_seasonal_diff_np  
# numpy.ndarray
# output: array([23, 30, 38]) 
# 23-0, 41-11, 61-23
# len = 3

array([23, 30, 38])

In [6]:
# Second order seasonal differencing
df_np_seasonal_diff_np_2 = df_np_seasonal_diff_np[2:] - df_np_seasonal_diff_np[:-2]
df_np_seasonal_diff_np_2  
# numpy.ndarray
# output: array([15])
# 38-23
# len = 1

array([15])

# Seasonal differencing using Numpy np.diff() - Panda Series

In [7]:
# First order seasonal differencing
# df_seasonal_diff = np.diff(df['data'], n=2) # ERROR. This is wrong. n=2 only implies first-order differencing twice. No shift or lag applied
df_np_seasonal_diff_pd = df['data'][2:].reset_index(drop=True) - df['data'][:-2]
df_np_seasonal_diff_pd  
# pandas.core.series.Series
# output: 
#       0    23
#       1    30
#       2    38
#       Name: data, dtype: int64
# 23-0, 41-11, 61-23
# len = 3
# shape (3,)

0    23
1    30
2    38
Name: data, dtype: int64

In [9]:
# Second order seasonal differencing
df_np_seasonal_diff_pd_2 = df_np_seasonal_diff_pd[2:].reset_index(drop=True) - df_np_seasonal_diff_pd[:-2]
df_np_seasonal_diff_pd_2  
# pandas.core.series.Series
# output: 
#       0    15
#       Name: data, dtype: int64
# 38-23
# len = 1

0    15
Name: data, dtype: int64

# Seasonal differencing using Pandas df.diff() - Panda Series

In [17]:
# First order seasonal differencing
df_pd_seasonal_diff = df['data'].diff(periods=2)  
# This is right, it implies lag=2, or shifting two positions when differencing, which is what we want for seasonal differencing
df_pd_seasonal_diff  
# pandas.core.series.Series
# output: 
#       0     NaN
#       1     NaN
#       2    23.0
#       3    30.0
#       4    38.0
#       Name: data, dtype: float64
# 23-0, 41-11, 61-23
# len = 5
# shape (5,)

0     NaN
1     NaN
2    23.0
3    30.0
4    38.0
Name: data, dtype: float64

In [None]:
# Second order seasonal differencing
df_pd_seasonal_diff_2 = df_pd_seasonal_diff.diff(periods=2)  
df_pd_seasonal_diff_2
# pandas.core.series.Series
# output: 
#       0     NaN
#       1     NaN
#       2     NaN
#       3     NaN
#       4    15.0
#       Name: data, dtype: float64
# 38-23
# len = 5
# shape (5,)

0     NaN
1     NaN
2     NaN
3     NaN
4    15.0
Name: data, dtype: float64