In [34]:
import pandas as pd


In [35]:
df = pd.read_csv(r'E:\IITGN_Project\autoencoder\data\anomaly.csv')
print(df.head())

       Date  Power  Detector Quality
0  1-Jan-16     96         8    Good
1  2-Jan-16     96        10    Good
2  3-Jan-16     91         8    Good
3  4-Jan-16     97         9    Good
4  5-Jan-16     91        11    Good


In [36]:
print(df.groupby('Quality')['Quality'].count())

Quality
Bad      407
Good    1054
Name: Quality, dtype: int64


In [37]:
df.drop(['Date'], axis=1, inplace=True)

In [38]:
df.dropna(inplace=True,axis=1)

In [39]:
df.Quality[df.Quality == 'Good'] = 1
df.Quality[df.Quality == 'Bad'] = 2

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df.Quality[df.Quality == 'Good'] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.Quality[df.Quality == '

In [40]:
df.head()

Unnamed: 0,Power,Detector,Quality
0,96,8,1
1,96,10,1
2,91,8,1
3,97,9,1
4,91,11,1


In [41]:
good_mask = df['Quality']== 1 #All good to be True for good data points
bad_mask = df['Quality']== 2 #All values False for good data points
#print(good_mask.head())

In [42]:
df.drop('Quality',axis=1,inplace=True)

df_good = df[good_mask]
df_bad = df[bad_mask]

In [43]:
print(f"Good count: {len(df_good)}")
print(f"Bad count: {len(df_bad)}")

Good count: 1054
Bad count: 407


In [44]:
x_good = df_good.values
x_bad = df_bad.values

In [25]:
from sklearn.model_selection import train_test_split

In [45]:
x_good_train, x_good_test = train_test_split(
        x_good, test_size=0.25, random_state=42)

In [46]:
print(f"Good train count: {len(x_good_train)}")
print(f"Good test count: {len(x_good_test)}")

Good train count: 790
Good test count: 264


In [47]:
from sklearn import metrics
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [48]:
model = Sequential()
model.add(Dense(10, input_dim=x_good.shape[1], activation='relu'))
model.add(Dense(3, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(x_good.shape[1])) 
model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [49]:
model.fit(x_good_train,x_good_train,verbose=1,epochs=100)

Epoch 1/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 4645.8027
Epoch 2/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4555.3350
Epoch 3/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4568.5957
Epoch 4/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4560.4844
Epoch 5/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4555.2217
Epoch 6/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 4546.7949  
Epoch 7/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4546.5474
Epoch 8/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 4553.4653
Epoch 9/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4540.5435
Epoch 10/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0

<keras.src.callbacks.history.History at 0x1be71fcd990>

In [50]:
pred = model.predict(x_good_test)
score1 = np.sqrt(metrics.mean_squared_error(pred, x_good_test))
print(f"Reconstruction error for good data: {score1}")

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Reconstruction error for good data: 65.76821860942651


In [51]:
pred = model.predict(x_good)
score2 = np.sqrt(metrics.mean_squared_error(pred, x_good))
print(f"Reconstruction error for all good data: {score2}")

[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Reconstruction error for all good data: 65.68865275055452


In [52]:
pre = model.predict(x_bad)
score3 = np.sqrt(metrics.mean_squared_error(pre, x_bad))
print(f"Reconstruction error for bad data: {score3}")

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Reconstruction error for bad data: 68.64858343851239


In [53]:
print(f"Insample Good Score (RMSE): {score1}".format(score1))
print(f"Out of Sample Good Score (RMSE): {score2}")
print(f"Bad Score (RMSE): {score3}")

Insample Good Score (RMSE): 65.76821860942651
Out of Sample Good Score (RMSE): 65.68865275055452
Bad Score (RMSE): 68.64858343851239
