> Solve Kuramoto–Sivashinsky with OpInf

<center>$\large \frac{\partial u}{\partial t}=-v\frac{\partial^4 u}{\partial x^4}-\frac{\partial^2 u}{\partial x^2}-u\frac{\partial u}{\partial x}$</center>

In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import plotly.graph_objects as go
import plotly.express as px
import progressbar
from sklearn.metrics import mean_absolute_error
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

%matplotlib inline

In [16]:
data = pd.read_parquet('Data/KS_data.parquet')

Scaling the data or PCA won't work

In [50]:
scaler = StandardScaler()
data = pd.DataFrame(scaler.fit_transform(data))
data = data.applymap(lambda n: (n + 1) / 2) # set all values bewteen 0 and 1

In [51]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,502,503,504,505,506,507,508,509,510,511
0,1.086332,1.008951,0.907741,0.785517,0.646871,0.497906,0.345701,0.197604,0.060495,-0.059843,...,0.817203,0.890230,0.959843,1.024059,1.080416,1.126043,1.157792,1.172447,1.166977,1.138841
1,1.102401,1.016629,0.905838,0.773563,0.625210,0.467698,0.308820,0.156440,0.017699,-0.101629,...,0.808442,0.890840,0.969035,1.040722,1.103125,1.153067,1.187132,1.201894,1.194207,1.161550
2,1.115235,1.022688,0.904017,0.763395,0.606954,0.442339,0.277969,0.122162,-0.017719,-0.135917,...,0.791687,0.882939,0.969408,1.048471,1.117039,1.171645,1.208608,1.224298,1.215454,1.179555
3,1.123548,1.026011,0.901386,0.754386,0.591750,0.421725,0.253252,0.095019,-0.045433,-0.162370,...,0.767628,0.866851,0.960939,1.046950,1.121501,1.180848,1.221073,1.238359,1.229335,1.191479
4,1.126147,1.025440,0.896885,0.745615,0.578839,0.405267,0.234235,0.074703,-0.065662,-0.181153,...,0.737347,0.843317,0.944042,1.036263,1.116316,1.180209,1.223815,1.243159,1.234780,1.196156
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199995,0.435963,0.503350,0.576707,0.654439,0.734518,0.814456,0.891318,0.961761,1.022111,1.068484,...,0.234105,0.216394,0.205251,0.201930,0.207361,0.222132,0.246507,0.280458,0.323709,0.375774
199996,0.451071,0.519879,0.594218,0.672369,0.752169,0.830990,0.905756,0.973009,1.029001,1.069837,...,0.227525,0.211700,0.202655,0.201592,0.209381,0.226554,0.253319,0.289592,0.335041,0.389121
199997,0.468162,0.538240,0.613345,0.691633,0.770798,0.848067,0.920231,0.983722,1.034738,1.069406,...,0.221732,0.208116,0.201456,0.202891,0.213233,0.232954,0.262206,0.300851,0.348501,0.404549
199998,0.487219,0.558379,0.634000,0.712101,0.790231,0.865468,0.934474,0.993589,1.038978,1.066827,...,0.216996,0.205891,0.201873,0.206018,0.219075,0.241460,0.273266,0.314303,0.364128,0.422070


In [52]:
go.Figure(data =
    go.Contour(
        z=data.loc[:1000].to_numpy()
    )).write_image('images/problem_3_data_example.png')

![](images/problem_3_data_example.png)

In [94]:
def test_n_components_acc(n, data):
    pca = PCA(n_components=n)
    reduced_data = pca.fit_transform(data)
    aux = pca.inverse_transform(reduced_data)
    return mean_absolute_error(data, aux)

In [97]:
pca_n_acc = []
for i in progressbar.progressbar(range(2, 50)):
    pca_n_acc.append(test_n_components_acc(i, data))

# I started with n_components up to 50 above, but results were bad even for 50, I just extended the test
# in order to save time, that's why the X of the chart looks like that

for i in progressbar.progressbar(range(50, 200, 10)):
    pca_n_acc.append(test_n_components_acc(i, data))

100% (15 of 15) |########################| Elapsed Time: 0:01:44 Time:  0:01:44


In [100]:
px.line(x=[*range(2, 50), *range(50, 200, 10)], y=pca_n_acc,
        title='Reduced data difference to real').write_image('images/PCA_components_test.png')

![](images/PCA_components_test.png)

I'll use 90 as the number of components, that's the first one where the difference to original on return is negligible

In [101]:
pca = PCA(n_components=90)

In [102]:
%%time
reduced_data = pca.fit_transform(data)

CPU times: user 20.9 s, sys: 4.37 s, total: 25.3 s
Wall time: 3.86 s


In [107]:
mean_absolute_error(data, pca.inverse_transform(reduced_data))

0.0006814402173735035

In [108]:
split = 0.8
split_point = int(split * len(reduced_data))
split_point

160000

In [111]:
reduced_data = pd.DataFrame(reduced_data)

In [113]:
X, y = reduced_data.loc[:split_point], reduced_data.loc[split_point:]