# Linear Trend Time Series

## D_r(real data)

In [None]:
import numpy as np
import plotly.express as px

X = np.arange(0, 3650).reshape(-1,1)
y = X + 1000

In [None]:
fig = px.line(y, title='Linear Trend')
fig.show()

#### Split D_r

In [None]:
X_train, X_test = X[:2920], X[2920:]
y_train, y_test = y[:2920], y[2920:]

In [None]:
X_train.size

## Synthetic Data Generation D_g

In [None]:
import pandas as pd
from ydata.synthesizers.timeseries.model import TimeSeriesSynthesizer
from ydata.dataset import Dataset
from ydata.metadata import Metadata
from ydata.utils.data_types import VariableType


In [None]:
import os

os.environ['YDATA_LICENSE_KEY'] = 'b6580aad-809b-4ff4-be31-af0159c28b99'

# your dataframe
f0_df = pd.DataFrame(y, columns=['f0']).reset_index()
f0_df.rename(columns={'index': 'time_idx'}, inplace=True)

# wrap dataset
dataset = Dataset(f0_df)

# tell YData the time column type
dataset.astype("time_idx", VariableType.INT)

# minimal metadata (same as example!)
metadata = Metadata(dataset, dataset_attrs={"sortbykey": "time_idx"})

In [None]:
synth = TimeSeriesSynthesizer()
synth.fit(dataset, metadata=metadata)

f0_synth = synth.sample(n_entities=1).to_pandas()
f0_synth_v_real = pd.DataFrame({'Synthetic': f0_synth['f0'], 'Actual': f0})

In [None]:
f0_synth

In [None]:
fig = px.line(f0_synth_v_real, y=f0_synth_v_real.columns,
             title='Linear Trend, Actual vs Synthetic')
fig.show()

## Task [T]: Regression
## Algorithm [A]: Linear Regression

#### Linear Regression model on D_r

In [None]:
from sklearn.linear_model import LinearRegression

model_r = LinearRegression()
model_r.fit(X_train, y_train)
model_r.score(X_train, y_train)

#### Evaluation of the model_r on D_r test

In [None]:
y_r_pred = model_r.predict(X_test).astype(int)
fig = px.line(y_r_pred, title='Linear Trend, Actual predictions')
fig.show()

#### Linear Regression model on D_g

In [None]:
X_g, y_g = np.array(f0_synth['time_idx']).reshape(-1,1), np.array(f0_synth['f0']).reshape(-1,1)

model_g = LinearRegression()
model_g.fit(X_g, y_g)
model_g.score(X_g, y_g)

#### Evaluation of the model_g on D_r test

In [None]:
y_g_pred = model_g.predict(X_test).astype(int)
fig = px.line(y_g_pred, title='Predictions, trained using the generated model')
fig.show()

In [None]:
X_test.size

### Agreement Rate (Measures the usability of the synthetic data)

In [None]:
r_agree = np.sum(y_g_pred == y_r_pred) / X_test.size
r_agree

# Seasonal Time Series

In [98]:
dfd182 = (X % 365) - 182
normalized_dfd182 = (dfd182 * np.pi) / 182

fig = px.line(normalized_dfd182,
             title='Distance from Day 182, normalized')
fig.show()