In [28]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

In [29]:
df = pd.read_csv("dataport-export_gas_oct2015-mar2016.csv")

In [30]:
df.head()

Unnamed: 0,localminute,dataid,meter_value
0,2015-10-01 00:00:10-05,739,88858
1,2015-10-01 00:00:13-05,8890,197164
2,2015-10-01 00:00:20-05,6910,179118
3,2015-10-01 00:00:22-05,3635,151318
4,2015-10-01 00:00:22-05,1507,390354


In [31]:
df.dtypes

localminute    object
dataid          int64
meter_value     int64
dtype: object

In [32]:
df.groupby('dataid')['meter_value'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
dataid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
35,11872.0,98799.541779,3585.696685,93470.0,95402.0,98397.0,102309.0,104692.0
44,1549.0,168857.252421,1621.907607,165674.0,167868.0,168188.0,169672.0,175070.0
77,10683.0,61162.735748,1655.197037,58832.0,59790.0,60742.0,62590.0,64652.0
94,36335.0,126452.122086,8260.652894,116642.0,117894.0,124482.0,134538.0,140358.0
114,2597.0,136067.471698,6187.644319,128294.0,129930.0,134774.0,143072.0,145298.0
...,...,...,...,...,...,...,...,...
9729,12361.0,129427.883019,5794.697171,122186.0,123380.0,128152.0,135684.0,138146.0
9766,2282.0,171690.504820,7682.217141,158512.0,161656.0,176406.0,177428.0,179902.0
9849,2741.0,35566.648668,1675.226417,33632.0,34070.0,35172.0,36632.0,39292.0
9956,1292.0,99377.100619,2762.759712,95860.0,96164.0,100438.0,101822.5,107042.0


In [33]:
df['localminute'].head()

0    2015-10-01 00:00:10-05
1    2015-10-01 00:00:13-05
2    2015-10-01 00:00:20-05
3    2015-10-01 00:00:22-05
4    2015-10-01 00:00:22-05
Name: localminute, dtype: object

As we can see, the localminute column containing the timestamp for each meter reading is:
1. Incorrectly formatted - the the "-05" or "-06" at the end is timezone data, indicating when Daylight Savings Time begins and ends. We can extract this to create a new feature
2. Not in the datetime datatype

In [34]:
df['DST'] = pd.to_numeric(df['localminute'].str[-2:]) == 5
df

Unnamed: 0,localminute,dataid,meter_value,DST
0,2015-10-01 00:00:10-05,739,88858,True
1,2015-10-01 00:00:13-05,8890,197164,True
2,2015-10-01 00:00:20-05,6910,179118,True
3,2015-10-01 00:00:22-05,3635,151318,True
4,2015-10-01 00:00:22-05,1507,390354,True
...,...,...,...,...
1584818,2016-03-31 23:59:14.336743-05,2129,201726,True
1584819,2016-03-31 23:59:17.427165-05,2945,161232,True
1584820,2016-03-31 23:59:35.370782-05,9729,138146,True
1584821,2016-03-31 23:59:47.816286-05,5129,166488,True


In [35]:
df['localminute'] = df['localminute'].str[:-3]
df['localminute'] = pd.to_datetime(df['localminute'], format='mixed').dt.strftime('%Y-%m-%d %H:%M:%S')
df['localminute'] = pd.to_datetime(df['localminute'])

In [36]:
df

Unnamed: 0,localminute,dataid,meter_value,DST
0,2015-10-01 00:00:10,739,88858,True
1,2015-10-01 00:00:13,8890,197164,True
2,2015-10-01 00:00:20,6910,179118,True
3,2015-10-01 00:00:22,3635,151318,True
4,2015-10-01 00:00:22,1507,390354,True
...,...,...,...,...
1584818,2016-03-31 23:59:14,2129,201726,True
1584819,2016-03-31 23:59:17,2945,161232,True
1584820,2016-03-31 23:59:35,9729,138146,True
1584821,2016-03-31 23:59:47,5129,166488,True


In [37]:
df.dtypes

localminute    datetime64[ns]
dataid                  int64
meter_value             int64
DST                      bool
dtype: object

In [38]:
df['dataid'].value_counts()

dataid
2034    75991
6910    69349
484     44034
5814    42424
5810    42234
        ...  
7566       32
9620       23
4671       21
6101        3
4874        2
Name: count, Length: 157, dtype: int64

Let's plot a time series graph for each dataid and try to see if there are any outliers, anomalies, or any patterns in the data. The meter values we have currently measure cumulative consumption and not marginal consumption.

In [39]:
df = df.set_index(df['dataid'])
df = df.drop(columns='dataid')

In [40]:
df.index.value_counts()

dataid
2034    75991
6910    69349
484     44034
5814    42424
5810    42234
        ...  
7566       32
9620       23
4671       21
6101        3
4874        2
Name: count, Length: 157, dtype: int64

In [41]:
df = df.drop([7566, 9620, 4671, 6101, 4874, 9160, 2755, 2946, 2814, 5545, 2645, 6685, 1403])
df.index.value_counts()

dataid
2034    75991
6910    69349
484     44034
5814    42424
5810    42234
        ...  
8059      529
5658      493
3036      336
9600      330
8703      258
Name: count, Length: 144, dtype: int64

I've gone ahead and dropped all the dataids that have less than 250 readings. I've seen later that these end up being quite problematic and irregular, so I've decided that they don't have enough data to be included in our investigation

In [42]:
df = df.reset_index()
df

Unnamed: 0,dataid,localminute,meter_value,DST
0,739,2015-10-01 00:00:10,88858,True
1,8890,2015-10-01 00:00:13,197164,True
2,6910,2015-10-01 00:00:20,179118,True
3,3635,2015-10-01 00:00:22,151318,True
4,1507,2015-10-01 00:00:22,390354,True
...,...,...,...,...
1584128,2129,2016-03-31 23:59:14,201726,True
1584129,2945,2016-03-31 23:59:17,161232,True
1584130,9729,2016-03-31 23:59:35,138146,True
1584131,5129,2016-03-31 23:59:47,166488,True


In [43]:
df = df.sort_values(by=['dataid', 'localminute'])

As we can see, the data is irregularly sampled, which is inconvenient to handle and creates a lot of uneccesary noise, especially as there is barely any difference at small intervals. Therefore we will extract the marginal consumption per hour from the cumulative readings using resampling.

In [44]:
# for dataid, group in df.groupby('dataid'):
#     fig, ax = plt.subplots(figsize=(10,5))
#     plt.plot(df[df['dataid'] == dataid].set_index("localminute")['meter_value'].diff())
#     ax.set_title(f'meter value by time for dataid {dataid}')
#     plt.show()

In [45]:
marginal_consumption = []
for dataid, group in df.groupby('dataid'):
    meter_value_diff = group['meter_value'].diff()
    marginal_consumption.append(meter_value_diff)


In [46]:
marginal_consumption = pd.concat(marginal_consumption)
df['marginal_consumption'] = marginal_consumption
df['marginal_consumption']

83          NaN
244         0.0
723         0.0
790         0.0
829         0.0
           ... 
1560987     0.0
1561582     2.0
1566451    48.0
1576619    68.0
1580897    72.0
Name: marginal_consumption, Length: 1584133, dtype: float64

In [47]:
df['marginal_consumption'] = df['marginal_consumption'].fillna(0)

In [48]:
df

Unnamed: 0,dataid,localminute,meter_value,DST,marginal_consumption
83,35,2015-10-01 00:14:44,93470,True,0.0
244,35,2015-10-01 00:42:34,93470,True,0.0
723,35,2015-10-01 02:02:37,93470,True,0.0
790,35,2015-10-01 02:12:38,93470,True,0.0
829,35,2015-10-01 02:20:36,93470,True,0.0
...,...,...,...,...,...
1560987,9982,2016-03-28 09:25:54,119734,True,0.0
1561582,9982,2016-03-28 11:29:41,119736,True,2.0
1566451,9982,2016-03-29 04:38:38,119784,True,48.0
1576619,9982,2016-03-30 17:46:42,119852,True,68.0


In [49]:
resampled_consumption = []
dataid_column = []
table = []

for dataid, group in df.groupby('dataid'):
    resample = group.resample('h', on='localminute')['marginal_consumption'].sum()
    resampled_consumption.append(resample)
    for i in range(len(resample)):
        dataid_column.append(dataid)

In [50]:
consumption_series = pd.concat(resampled_consumption)
consumption_series

localminute
2015-10-01 00:00:00     0.0
2015-10-01 01:00:00     0.0
2015-10-01 02:00:00     0.0
2015-10-01 03:00:00     0.0
2015-10-01 04:00:00     0.0
                       ... 
2016-03-31 07:00:00     0.0
2016-03-31 08:00:00     0.0
2016-03-31 09:00:00     0.0
2016-03-31 10:00:00     0.0
2016-03-31 11:00:00    72.0
Name: marginal_consumption, Length: 591478, dtype: float64

In [51]:
df = pd.DataFrame(consumption_series)

In [52]:
len(dataid_column)

591478

In [53]:
df['dataid'] = dataid_column
df

Unnamed: 0_level_0,marginal_consumption,dataid
localminute,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-10-01 00:00:00,0.0,35
2015-10-01 01:00:00,0.0,35
2015-10-01 02:00:00,0.0,35
2015-10-01 03:00:00,0.0,35
2015-10-01 04:00:00,0.0,35
...,...,...
2016-03-31 07:00:00,0.0,9982
2016-03-31 08:00:00,0.0,9982
2016-03-31 09:00:00,0.0,9982
2016-03-31 10:00:00,0.0,9982


In [54]:
df['DST'] = df.index.map( lambda x: 1 if x < pd.to_datetime('2015-11-01 01:00:00', format='mixed') or x > pd.to_datetime('2016-03-13 02:00:00', format='mixed') else 0 )

In [55]:
df

Unnamed: 0_level_0,marginal_consumption,dataid,DST
localminute,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-10-01 00:00:00,0.0,35,1
2015-10-01 01:00:00,0.0,35,1
2015-10-01 02:00:00,0.0,35,1
2015-10-01 03:00:00,0.0,35,1
2015-10-01 04:00:00,0.0,35,1
...,...,...,...
2016-03-31 07:00:00,0.0,9982,1
2016-03-31 08:00:00,0.0,9982,1
2016-03-31 09:00:00,0.0,9982,1
2016-03-31 10:00:00,0.0,9982,1


Now we have our new dataset resampled by the hour, with marginal consumption extracted. Let's have a look at the time series graphs now.

It seems there are outliers present that could hamper our model performance. Let us try and remove them. Firstly, let's start with the ones with a large negative marginal consumption. Since our meter value data is cumulative, the marginal consumption cannot be negative

In [56]:
df = df[~df['dataid'].isin(df[df['marginal_consumption'] < -50]['dataid'].unique())]   

In [57]:
len(df[df['marginal_consumption'] < -50]['dataid'].unique())

0

In [58]:
len(df['dataid'].unique())

129

In [59]:
extreme = df[df['marginal_consumption'] > 5000]['dataid'].unique()

In [60]:
len(df[df['dataid'] == 4874])

0

In [61]:
df['marginal_consumption'].min()

0.0

In [62]:
len(df[df['marginal_consumption'] > 1000]['dataid'].unique())

22

In [63]:
df['marginal_consumption'].max()

13982.0

In [64]:
removed = []
for dataid, group in df.groupby('dataid'): 
    ub = group['marginal_consumption'].quantile(0.99)
    # outliers = group['marginal_consumption'][group['marginal_consumption'] > ub]
    group.loc[group['marginal_consumption'] > ub] = np.nan
    group = group.interpolate()
    removed.append(group['marginal_consumption'])

In [65]:
df['marginal_consumption'] = pd.concat(removed)
df['marginal_consumption'].isna().sum()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['marginal_consumption'] = pd.concat(removed)


0

In [66]:
# for dataid, group in df.groupby('dataid'): 
#     plt.hist(group['marginal_consumption'])
#     plt.show()

In [67]:
from scipy import stats

In [68]:
df.reset_index(inplace=True)

In [69]:
df["marginal_diff"] = df["marginal_consumption"].diff()
df.fillna(0, inplace=True)
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["marginal_diff"] = df["marginal_consumption"].diff()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.fillna(0, inplace=True)


Unnamed: 0,localminute,marginal_consumption,dataid,DST,marginal_diff
0,2015-10-01 00:00:00,0.0,35,1,0.0
1,2015-10-01 01:00:00,0.0,35,1,0.0
2,2015-10-01 02:00:00,0.0,35,1,0.0
3,2015-10-01 03:00:00,0.0,35,1,0.0
4,2015-10-01 04:00:00,0.0,35,1,0.0
...,...,...,...,...,...
525616,2016-02-08 12:00:00,0.0,9956,0,0.0
525617,2016-02-08 13:00:00,0.0,9956,0,0.0
525618,2016-02-08 14:00:00,0.0,9956,0,0.0
525619,2016-02-08 15:00:00,0.0,9956,0,0.0


In [70]:
df['localminute'].dt.year

0         2015
1         2015
2         2015
3         2015
4         2015
          ... 
525616    2016
525617    2016
525618    2016
525619    2016
525620    2016
Name: localminute, Length: 525621, dtype: int32

In [71]:
df['week'] = df['localminute'].dt.isocalendar().week
df['week']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['week'] = df['localminute'].dt.isocalendar().week


0         40
1         40
2         40
3         40
4         40
          ..
525616     6
525617     6
525618     6
525619     6
525620     6
Name: week, Length: 525621, dtype: UInt32

In [72]:
standardized = []
for dataid, group in df.groupby('dataid'):
    weekly_variance = group.groupby(group['localminute'].dt.isocalendar().week)['marginal_diff'].std()
    mapped_weekly_variance = group['localminute'].map(lambda x: weekly_variance.loc[x.week])
    group['marginal_diff'] = np.where(mapped_weekly_variance != 0, group['marginal_diff'] / mapped_weekly_variance, 0)
    standardized.append(group['marginal_diff'])
    # group.set_index('localminute')['marginal_diff'].plot(figsize=(15,5))
    # plt.xlabel('Date')
    # plt.ylabel('Standardized beer production rate')
    # plt.grid()
    # plt.show()

In [73]:
df['marginal_diff'] = pd.concat(standardized)
df['marginal_diff']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['marginal_diff'] = pd.concat(standardized)


0         0.0
1         0.0
2         0.0
3         0.0
4         0.0
         ... 
525616    0.0
525617    0.0
525618    0.0
525619    0.0
525620    0.0
Name: marginal_diff, Length: 525621, dtype: float64

In [74]:
df.isna().sum()

localminute             0
marginal_consumption    0
dataid                  0
DST                     0
marginal_diff           0
week                    0
dtype: int64

In [75]:
from statsmodels.tsa.stattools import adfuller

for dataid, group in df.groupby('dataid'):
    result = adfuller(group["marginal_diff"].values)
    if result[1] <= 0.05:
        print("Stationary")
    else:
        print(f"Not stationary: {dataid}")

Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary

In [76]:
import warnings
from statsmodels.tools.sm_exceptions import InterpolationWarning
warnings.simplefilter('ignore', InterpolationWarning)


In [77]:
from statsmodels.tsa.stattools import kpss
for dataid, group in df.groupby('dataid'):
    result = kpss(group["marginal_diff"].values, regression = "ct")
    if result[1] > 0.05:
        print("Stationary")
    else:
        print(f"Not stationary: {dataid}")

Not stationary: 35
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Not stationary: 661
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Not stationary: 1800
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Not stationary: 3723
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Not stationary: 5275
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Stationary
Station

In [78]:
df['marginal_diff'].skew()

0.09241104017679351

In [79]:
df['year'] = df['localminute'].dt.year
df['month'] = df['localminute'].dt.month
df['day'] = df['localminute'].dt.day
df['hour'] = df['localminute'].dt.hour
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['year'] = df['localminute'].dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['month'] = df['localminute'].dt.month
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['day'] = df['localminute'].dt.day
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_in

Unnamed: 0,localminute,marginal_consumption,dataid,DST,marginal_diff,week,year,month,day,hour
0,2015-10-01 00:00:00,0.0,35,1,0.0,40,2015,10,1,0
1,2015-10-01 01:00:00,0.0,35,1,0.0,40,2015,10,1,1
2,2015-10-01 02:00:00,0.0,35,1,0.0,40,2015,10,1,2
3,2015-10-01 03:00:00,0.0,35,1,0.0,40,2015,10,1,3
4,2015-10-01 04:00:00,0.0,35,1,0.0,40,2015,10,1,4
...,...,...,...,...,...,...,...,...,...,...
525616,2016-02-08 12:00:00,0.0,9956,0,0.0,6,2016,2,8,12
525617,2016-02-08 13:00:00,0.0,9956,0,0.0,6,2016,2,8,13
525618,2016-02-08 14:00:00,0.0,9956,0,0.0,6,2016,2,8,14
525619,2016-02-08 15:00:00,0.0,9956,0,0.0,6,2016,2,8,15


In [80]:
missing_percentage = 0.1  # Introduce 10% missing values
num_missing = int(len(df) * missing_percentage)

# Randomly select indices to introduce missing values
np.random.seed(42)  # For reproducibility
missing_indices = np.random.choice(df.index, size=num_missing, replace=False)


In [81]:
df_with_nans = df.copy()

# Introduce NaNs into the 'marginal_diff' column
df_with_nans.loc[missing_indices, 'marginal_diff'] = np.nan

# Check for NaNs
print("Number of NaNs introduced in 'marginal_diff':", df_with_nans['marginal_diff'].isnull().sum())

Number of NaNs introduced in 'marginal_diff': 52562


In [82]:
features = ['marginal_diff', 'DST', 'week', 'year', 'month', 'day', 'hour', 'dataid']

In [83]:
from sklearn.model_selection import train_test_split

In [84]:
train_data, test_data = train_test_split(df_with_nans, test_size=0.2, random_state=42, shuffle=False)

In [85]:
from sklearn.preprocessing import MinMaxScaler
train_features = train_data[features].dropna()
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(train_features)

In [86]:
# Prepare the data for LSTM
def create_dataset(data, time_step=1):
    X, y = [], []
    for i in range(len(data) - time_step):
        if np.isnan(data.iloc[i + time_step]['marginal_diff']):  # Check if target is NaN
            continue
        X.append(data.iloc[i:(i + time_step)].values)  # Include all features
        y.append(data.iloc[i + time_step]['marginal_diff'])  # Target is 'marginal_diff'
    return np.array(X), np.array(y)

In [87]:
time_step = 24
X_train, y_train = create_dataset(pd.DataFrame(X_train_scaled, columns=features, index=train_features.index), time_step)

In [88]:
X_train, y_train

(array([[[0.54085386, 1.        , 0.75      , ..., 0.        ,
          0.        , 0.        ],
         [0.54085386, 1.        , 0.75      , ..., 0.        ,
          0.04347826, 0.        ],
         [0.54085386, 1.        , 0.75      , ..., 0.        ,
          0.13043478, 0.        ],
         ...,
         [0.54085386, 1.        , 0.75      , ..., 0.03333333,
          0.        , 0.        ],
         [0.54085386, 1.        , 0.75      , ..., 0.03333333,
          0.04347826, 0.        ],
         [0.54085386, 1.        , 0.75      , ..., 0.03333333,
          0.08695652, 0.        ]],
 
        [[0.54085386, 1.        , 0.75      , ..., 0.        ,
          0.04347826, 0.        ],
         [0.54085386, 1.        , 0.75      , ..., 0.        ,
          0.13043478, 0.        ],
         [0.54085386, 1.        , 0.75      , ..., 0.        ,
          0.17391304, 0.        ],
         ...,
         [0.54085386, 1.        , 0.75      , ..., 0.03333333,
          0.04347826, 0.

In [89]:
# Reshape X for LSTM input
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], len(features))

In [90]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Masking
from tensorflow.keras.losses import MeanSquaredError
import tensorflow as tf

In [91]:
print(f"TensorFlow has access to the following devices:\n{tf.config.list_physical_devices()}")

# See TensorFlow version
print(f"TensorFlow version: {tf.__version__}")

TensorFlow has access to the following devices:
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
TensorFlow version: 2.16.2


In [94]:
with tf.device('/job:localhost/replica:0/task:0/device:CPU:0'):
    print("Hello")



Hello


In [95]:
with tf.device('/job:localhost/replica:0/task:0/device:CPU:0'):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], len(features))))
    model.add(tf.keras.layers.LSTM(50))
    model.add(tf.keras.layers.Dense(1))  # Output layer for predicting 'marginal_diff'

  super().__init__(**kwargs)


In [None]:
with tf.device('/job:localhost/replica:0/task:0/device:CPU:0'):
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=100, batch_size=32)

Epoch 1/100
