In [85]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as  plt
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Reshape

In [87]:
input_df = pd.read_csv("../../data/csv_files/df_ph1.csv")

In [89]:
input_df.shape

(7748, 10)

In [91]:
stations_name = list(input_df.columns)
stations_name.remove("dt")

index_name = {}
for i, name in enumerate(stations_name):
    index_name[i] = name
index_name

{0: 'FEX_004',
 1: 'FSB_095DR',
 2: 'FSB_078',
 3: 'FSB_126',
 4: 'FSB_130D',
 5: 'FPZ_06A',
 6: 'FSB_079',
 7: 'FPZ_04A',
 8: 'FSB_097D'}

In [93]:
input_df = input_df.interpolate(method="linear")

In [95]:
input_df.isna().sum()

dt           0
FEX_004      0
FSB_095DR    0
FSB_078      0
FSB_126      0
FSB_130D     0
FPZ_06A      0
FSB_079      0
FPZ_04A      0
FSB_097D     0
dtype: int64

In [97]:
datetime = input_df['dt']
df = input_df[8:]
datetime = df['dt']
df = df.drop(columns = ['dt'])
df.shape

(7740, 9)

In [99]:
arr = df.values

In [101]:
reshaped_arr = arr.reshape((7740, 9, 1))
reshaped_arr.shape

(7740, 9, 1)

In [103]:
def create_4d_array(array, window_size, overlap):
    step = window_size - overlap
    num_rows, num_columns, depth = array.shape
    num_windows = (num_rows - window_size) // step + 1

    array_4d = np.zeros((num_windows, window_size, num_columns, depth))

    count = 0
    for i in range(0, num_windows, step):
        array_4d[count] = array[i:i+window_size]
        count += 1

    return array_4d

In [105]:
reshaped_arr[:2]

array([[[3.98233366],
        [4.2424221 ],
        [4.17438126],
        [4.17998578],
        [4.13214827],
        [5.25426054],
        [7.71799278],
        [6.41627455],
        [3.7679441 ]],

       [[3.98229194],
        [4.24108791],
        [4.17507362],
        [4.18014199],
        [4.13327551],
        [5.25284767],
        [7.71863556],
        [6.41649199],
        [3.76802993]]])

In [107]:
X_train = create_4d_array(reshaped_arr, 10, 6)
X_train.shape

(1933, 10, 9, 1)

In [109]:
model = Sequential([
    Conv2D(32, kernel_size=(3,1), activation='relu', input_shape=(10, 9, 1)),
    MaxPooling2D(pool_size=(2,1)),
    
    Conv2D(64, kernel_size=(3,1), activation='relu'),
    MaxPooling2D(pool_size=(2,1)),
    
    Flatten(),    
    Dense(64, activation='relu'),
    Dense(10 * 9 * 1 , activation='linear'),
    Reshape((10, 9, 1))
])

model.compile(optimizer='adam', loss='mean_squared_error') 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [111]:
model.fit(X_train, X_train, epochs = 100, batch_size = 64)

Epoch 1/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5.7742  
Epoch 2/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.3938
Epoch 3/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0097
Epoch 4/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0014 
Epoch 5/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 9.5526e-04
Epoch 6/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 9.2654e-04
Epoch 7/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 9.5117e-04
Epoch 8/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 9.0694e-04
Epoch 9/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 9.7155e-04
Epoch 10/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

<keras.src.callbacks.history.History at 0x30db47210>

In [113]:
X_test = create_4d_array(reshaped_arr, 10, 0)

In [115]:
X_test.shape

(774, 10, 9, 1)

In [117]:
model.evaluate(X_test, X_test)

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 551us/step - loss: 5.5182e-04


0.0001784201303962618

In [119]:
y_pred = model.predict(X_test)

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


In [121]:
y_pred_reshaped = y_pred.reshape((-1, 9))
y_pred_reshaped.shape

(7740, 9)

In [123]:
predicted_df = pd.DataFrame(y_pred_reshaped)

In [127]:
predicted_df.rename(columns = index_name, inplace = True)

In [129]:
predicted_df

Unnamed: 0,FEX_004,FSB_095DR,FSB_078,FSB_126,FSB_130D,FPZ_06A,FSB_079,FPZ_04A,FSB_097D
0,3.979327,4.204924,4.146307e+00,4.193398,4.123796,5.298664,7.879143e+00,6.411238,3.766538
1,3.974967,4.216809,4.138014e+00,4.190757,4.161385,5.258179,7.862474e+00,6.389676,3.780444
2,3.946427,4.209148,4.123145e+00,4.147642,4.138660,5.291647,7.857821e+00,6.381494,3.747722
3,3.951205,4.200116,4.125548e+00,4.174520,4.172224,5.295893,7.865590e+00,6.401342,3.733856
4,3.961201,4.218462,4.131424e+00,4.162150,4.133429,5.285905,7.838560e+00,6.389393,3.741643
...,...,...,...,...,...,...,...,...,...
7735,-0.000024,0.000032,1.189858e-05,0.000050,0.000028,0.000075,1.005828e-07,-0.000022,0.000067
7736,0.000013,0.000071,-5.811453e-07,0.000035,0.000058,0.000122,-4.318729e-05,0.000013,-0.000006
7737,0.000060,0.000043,-2.456643e-05,0.000045,-0.000018,0.000058,-6.837025e-05,-0.000016,0.000020
7738,-0.000036,0.000036,-8.716807e-05,0.000021,0.000044,0.000076,5.983654e-05,0.000045,0.000046


In [131]:
df

Unnamed: 0,FEX_004,FSB_095DR,FSB_078,FSB_126,FSB_130D,FPZ_06A,FSB_079,FPZ_04A,FSB_097D
8,3.982334,4.242422,4.174381,4.179986,4.132148,5.254261,7.717993,6.416275,3.767944
9,3.982292,4.241088,4.175074,4.180142,4.133276,5.252848,7.718636,6.416492,3.768030
10,3.981953,4.242620,4.177405,4.180298,4.133048,5.250815,7.717875,6.416061,3.767769
11,3.983563,4.243669,4.176972,4.180454,4.133577,5.248752,7.716632,6.415847,3.768118
12,3.983425,4.243669,4.173831,4.180611,4.133913,5.245027,7.716830,6.415980,3.768550
...,...,...,...,...,...,...,...,...,...
7743,4.373425,4.483142,4.300413,4.495069,4.439222,5.585535,7.878193,7.592293,4.167729
7744,4.374073,4.482854,4.301687,4.494333,4.439609,5.582193,7.877029,7.593216,4.167895
7745,4.375431,4.483341,4.304706,4.495460,4.439438,5.579660,7.880521,7.605413,4.167374
7746,4.373557,4.482985,4.309968,4.494287,4.439528,5.576629,7.880730,7.593132,4.168333


In [None]:
plt.figure(figsize=(10, 5))
# for index in index_name.values():
index = index_name[0]
plt.plot(datetime, df[index], marker='.', color="red", label="actual-data")
plt.plot(datetime, predicted_df[index], marker='.', color="blue", label = "predicted-data")


plt.xlabel('Date Time')
plt.ylabel('ph')
plt.title(f'Plot actual pH and predicted pH for {index}')
plt.legend()
plt.show()