In [12]:
from flowgen import stock_5y, synthesis
import plotly.express as px
from statsmodels.tsa.seasonal import seasonal_decompose
import pandas as pd
import numpy as np

In [13]:
aapl = stock_5y('AAPl')

In [3]:
ts_data = aapl.set_index('date')['close']

In [14]:
px.line(aapl, x='date', y='close')

### Long Range Dependence
#### Check the stationarity of the series

In [36]:
from statsmodels.tsa.stattools import adfuller, kpss

def kpss_test(timeseries):
    print("Results of KPSS Test:")
    kpsstest = kpss(timeseries, regression="c", nlags="auto")
    kpss_output = pd.Series(
        kpsstest[0:3], index=["Test Statistic", "p-value", "Lags Used"]
    )
    for key, value in kpsstest[3].items():
        kpss_output["Critical Value (%s)" % key] = value
    print(kpss_output)


def adf_test(timeseries):
    print("Results of Dickey-Fuller Test:")
    dftest = adfuller(timeseries, autolag="AIC")
    dfoutput = pd.Series(
        dftest[0:4],
        index=[
            "Test Statistic",
            "p-value",
            "#Lags Used",
            "Number of Observations Used",
        ],
    )
    for key, value in dftest[4].items():
        dfoutput["Critical Value (%s)" % key] = value
    print(dfoutput)

In [35]:
adf_test(aapl['close'])

Results of Dickey-Fuller Test:
Test Statistic                   -0.737704
p-value                           0.836740
#Lags Used                        1.000000
Number of Observations Used    1254.000000
Critical Value (1%)              -3.435575
Critical Value (5%)              -2.863848
Critical Value (10%)             -2.567999
dtype: float64


In [37]:
kpss_test(aapl['close'])

Results of KPSS Test:
Test Statistic            4.964163
p-value                   0.010000
Lags Used                20.000000
Critical Value (10%)      0.347000
Critical Value (5%)       0.463000
Critical Value (2.5%)     0.574000
Critical Value (1%)       0.739000
dtype: float64


#### Both tests conclude that the series is not stationary

In [15]:
# segment length to calculate local fluctuations
import math
q = math.pow((314/4), (1/9))

T = [int(4 * math.pow(q, j)) for j in range(10)]
T[9] = 314

T = np.array(T)

### Detrended Fluctuations Analysis of Real Time Series

##### Interpretation:
##### alpha = 1 : perfect similarity
##### alpha = 1/2 : uncorrelated, white noise
##### 1/2 < alpha < 1 : positive correlation
##### alpha < 1/2 : inversely correlated
##### alpha = 1 : non-stationarity, while correlation exist they cannot be described in the form of power law relationship
##### alpha = 3/2 : Brown noise

In [17]:
import fathon

dfa = fathon.DFA(aapl['close'])
n, local_fluctuation = np.array(dfa.computeFlucVec(T))
alpha_real, intercept = dfa.fitFlucVec()
alpha_real

0.5163605730049681

### Synthetic Time Series

In [18]:
aapl_synth = synthesis(aapl)
synth_v_real = pd.DataFrame({'Synthetic': aapl_synth['close'], 'Actual': aapl['close']})

[########################################] | 100% Completed | 101.21 ms
[########################################] | 100% Completed | 101.39 ms
[########################################] | 100% Completed | 100.99 ms
INFO: 2025-12-08 17:08:38,747 [SYNTHESIZER] - Initializing Time Series SYNTHESIZER.
Validate the segmentation strategy.
INFO: 2025-12-08 17:08:38,748 [SYNTHESIZER] - Number columns considered for synth: 2
INFO: 2025-12-08 17:08:39,022 [SYNTHESIZER] - Starting the synthetic data modeling process over 1x1 blocks.
INFO: 2025-12-08 17:08:39,024 [SYNTHESIZER] - Preprocess segment
INFO: 2025-12-08 17:08:39,026 [SYNTHESIZER] - Synthesizer init.
INFO: 2025-12-08 17:08:39,027 [SYNTHESIZER] - Processing the data prior fitting the synthesizer.
INFO: 2025-12-08 17:08:39,848 Pandas backend loaded 2.2.3
INFO: 2025-12-08 17:08:39,852 Numpy backend loaded 1.26.4
INFO: 2025-12-08 17:08:39,853 Pyspark backend NOT loaded
INFO: 2025-12-08 17:08:39,853 Python backend loaded
INFO: 2025-12-08 17:

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.

  return getattr(__obj, self.method)(*args, **kwargs)
Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [19]:
# Plot of Actual vs Synthetic
fig = px.line(synth_v_real, y=synth_v_real.columns,
             title='Actual vs Synthetic')
fig.show()

### DFA on Sythetic Time Series

In [20]:
dfa = fathon.DFA(aapl_synth['close'])
n, local_fluctuation = np.array(dfa.computeFlucVec(T))
alpha_synth, intercept = dfa.fitFlucVec()
alpha_synth

0.40343453467447316