In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA

## Data Generation

In [2]:
# Generate a date range
date_rng = pd.date_range(start='2020-01-01', end='2020-01-31', freq='D')
date_rng

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08',
               '2020-01-09', '2020-01-10', '2020-01-11', '2020-01-12',
               '2020-01-13', '2020-01-14', '2020-01-15', '2020-01-16',
               '2020-01-17', '2020-01-18', '2020-01-19', '2020-01-20',
               '2020-01-21', '2020-01-22', '2020-01-23', '2020-01-24',
               '2020-01-25', '2020-01-26', '2020-01-27', '2020-01-28',
               '2020-01-29', '2020-01-30', '2020-01-31'],
              dtype='datetime64[ns]', freq='D')

In [3]:
# Create a pandas DataFrame with the date range and some random data
df = pd.DataFrame(date_rng, columns=['date'])
df['data'] = np.random.randn(len(date_rng)) + np.linspace(0, 10, len(date_rng))
df

Unnamed: 0,date,data
0,2020-01-01,-1.094316
1,2020-01-02,1.204323
2,2020-01-03,0.074715
3,2020-01-04,1.666345
4,2020-01-05,1.029067
5,2020-01-06,1.136642
6,2020-01-07,2.858989
7,2020-01-08,1.803627
8,2020-01-09,2.986036
9,2020-01-10,2.800211


In [4]:
df.to_json('data.json', orient='records')

## Model

In [9]:
df = pd.read_json('data.json')
# Set the date column as the index
df.set_index('date', inplace=True)
df

Unnamed: 0_level_0,data
date,Unnamed: 1_level_1
2020-01-01,0.757789
2020-01-02,-0.571961
2020-01-03,-0.424605
2020-01-04,-2.851935
2020-01-05,0.688226
...,...
2020-12-27,11.510349
2020-12-28,10.630386
2020-12-29,8.870571
2020-12-30,10.513049


In [10]:
df.set_index('date', inplace=True)
df
# Define the ARIMA model
model = ARIMA(df['data'], order=(5, 1, 0))  # (p, d, q) parameters

# Fit the model
model_fit = model.fit()

# Print the model summary
print(model_fit.summary())

# Forecast the next 10 days
forecast = model_fit.forecast(steps=10)

# Create a date range for the forecast
forecast_dates = pd.date_range(start=df.index[-1] + pd.Timedelta(days=1), periods=10, freq='D')

# Create a DataFrame for the forecasted values
forecast_df = pd.DataFrame(forecast, index=forecast_dates, columns=['forecast'])

                               SARIMAX Results                                
Dep. Variable:                   data   No. Observations:                  366
Model:                 ARIMA(5, 1, 0)   Log Likelihood                -536.597
Date:                Tue, 28 May 2024   AIC                           1085.194
Time:                        11:49:23   BIC                           1108.594
Sample:                    01-01-2020   HQIC                          1094.494
                         - 12-31-2020                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.8706      0.053    -16.327      0.000      -0.975      -0.766
ar.L2         -0.6418      0.068     -9.408      0.000      -0.775      -0.508
ar.L3         -0.4790      0.072     -6.666      0.0

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [11]:
forecast

2021-01-01    10.394883
2021-01-02    10.455489
2021-01-03    10.285088
2021-01-04    10.377151
2021-01-05    10.517201
2021-01-06    10.476574
2021-01-07    10.421214
2021-01-08    10.420216
2021-01-09    10.424887
2021-01-10    10.444439
Freq: D, Name: predicted_mean, dtype: float64

In [12]:
forecast_df = pd.DataFrame(forecast, index=forecast_dates)
forecast_df

Unnamed: 0,predicted_mean
2021-01-01,10.394883
2021-01-02,10.455489
2021-01-03,10.285088
2021-01-04,10.377151
2021-01-05,10.517201
2021-01-06,10.476574
2021-01-07,10.421214
2021-01-08,10.420216
2021-01-09,10.424887
2021-01-10,10.444439


In [69]:
forecast_df = pd.DataFrame({'forecast': forecast})
forecast_df['date'] = forecast_df.index
forecast_df.reset_index(inplace = True, drop = True)
forecast_df

Unnamed: 0,forecast,date
0,10.394883,2021-01-01
1,10.455489,2021-01-02
2,10.285088,2021-01-03
3,10.377151,2021-01-04
4,10.517201,2021-01-05
5,10.476574,2021-01-06
6,10.421214,2021-01-07
7,10.420216,2021-01-08
8,10.424887,2021-01-09
9,10.444439,2021-01-10


In [60]:
forecast_df.columns

Index(['forecast'], dtype='object')

In [15]:
df.to_csv('data.csv')

## Binary Stuff

In [49]:
import json

# Step 1: Assume we have a binary string that was encoded from a JSON string
binary_string = b'[{"date":1577836800000,"data":1.0056120457},{"date":1577923200000,"data":0.4713256745},{"date":1578009600000,"data":2.3427238803},{"date":1578096000000,"data":1.4601122216},{"date":1578182400000,"data":2.4416574572},{"date":1578268800000,"data":4.3285273087},{"date":1578355200000,"data":4.8192839998},{"date":1578441600000,"data":4.1141419147},{"date":1578528000000,"data":6.3720429471},{"date":1578614400000,"data":6.4064233625},{"date":1578700800000,"data":7.2499034329},{"date":1578787200000,"data":7.6240510419},{"date":1578873600000,"data":6.4497432266},{"date":1578960000000,"data":8.1803434826},{"date":1579046400000,"data":12.6877268228}]'


# Step 2: Decode the binary string to a regular string
json_string = binary_string.decode('utf-8')

json_string

'[{"date":1577836800000,"data":1.0056120457},{"date":1577923200000,"data":0.4713256745},{"date":1578009600000,"data":2.3427238803},{"date":1578096000000,"data":1.4601122216},{"date":1578182400000,"data":2.4416574572},{"date":1578268800000,"data":4.3285273087},{"date":1578355200000,"data":4.8192839998},{"date":1578441600000,"data":4.1141419147},{"date":1578528000000,"data":6.3720429471},{"date":1578614400000,"data":6.4064233625},{"date":1578700800000,"data":7.2499034329},{"date":1578787200000,"data":7.6240510419},{"date":1578873600000,"data":6.4497432266},{"date":1578960000000,"data":8.1803434826},{"date":1579046400000,"data":12.6877268228}]'

In [50]:
# Step 3: Parse the JSON string into a Python dictionary
dictionary = json.loads(json_string)

dictionary

[{'date': 1577836800000, 'data': 1.0056120457},
 {'date': 1577923200000, 'data': 0.4713256745},
 {'date': 1578009600000, 'data': 2.3427238803},
 {'date': 1578096000000, 'data': 1.4601122216},
 {'date': 1578182400000, 'data': 2.4416574572},
 {'date': 1578268800000, 'data': 4.3285273087},
 {'date': 1578355200000, 'data': 4.8192839998},
 {'date': 1578441600000, 'data': 4.1141419147},
 {'date': 1578528000000, 'data': 6.3720429471},
 {'date': 1578614400000, 'data': 6.4064233625},
 {'date': 1578700800000, 'data': 7.2499034329},
 {'date': 1578787200000, 'data': 7.6240510419},
 {'date': 1578873600000, 'data': 6.4497432266},
 {'date': 1578960000000, 'data': 8.1803434826},
 {'date': 1579046400000, 'data': 12.6877268228}]

In [53]:
df = pd.DataFrame(dictionary)
df

Unnamed: 0,date,data
0,1577836800000,1.005612
1,1577923200000,0.471326
2,1578009600000,2.342724
3,1578096000000,1.460112
4,1578182400000,2.441657
5,1578268800000,4.328527
6,1578355200000,4.819284
7,1578441600000,4.114142
8,1578528000000,6.372043
9,1578614400000,6.406423


In [55]:
df.dtypes

date      int64
data    float64
dtype: object

In [57]:
df['date'] = pd.to_datetime(df['date'], unit='ms')
df

Unnamed: 0,date,data
0,2020-01-01,1.005612
1,2020-01-02,0.471326
2,2020-01-03,2.342724
3,2020-01-04,1.460112
4,2020-01-05,2.441657
5,2020-01-06,4.328527
6,2020-01-07,4.819284
7,2020-01-08,4.114142
8,2020-01-09,6.372043
9,2020-01-10,6.406423
