In [11]:
import pandas as pd
import numpy as np

In [12]:
path="../outputs/module1_results.csv"
df=pd.read_csv(path)
df

Unnamed: 0,image_name,vehicle_count,Pedestrian_count,congestion
0,7d06fefd-f7be05a6.jpg,9,0,LOW
1,7d128593-0ccfea4c.jpg,3,0,LOW
2,7d15b18b-1e0d6e3f.jpg,6,0,LOW
3,7d209219-ccdc1a09.jpg,5,0,LOW
4,7d22891c-224788c0.jpg,3,0,LOW
...,...,...,...,...
995,abc6b442-311adbdf.jpg,2,0,LOW
996,abf6f644-ade3cfa2.jpg,5,0,LOW
997,abfbfcb1-0e220001.jpg,17,0,Medium
998,ac033a32-493865fc.jpg,10,1,Medium


In [13]:
feature=df[["vehicle_count","Pedestrian_count"]].values
feature

array([[ 9,  0],
       [ 3,  0],
       [ 6,  0],
       ...,
       [17,  0],
       [10,  1],
       [10,  3]], shape=(1000, 2))

In [14]:
# Normalize features
feature=(feature-feature.mean(axis=0))/feature.std(axis=0)
feature

array([[ 0.56070817, -0.39291135],
       [-0.88690328, -0.39291135],
       [-0.16309756, -0.39291135],
       ...,
       [ 2.49085675, -0.39291135],
       [ 0.80197674,  0.35549123],
       [ 0.80197674,  1.85229638]], shape=(1000, 2))

In [15]:
import tensorflow as tf
from tensorflow.keras import layers,Model

In [21]:
class VAE(Model):
    def __init__(self,input_dim,latent_dim=2):
        super(VAE,self).__init__()

        #Encoder
        self.encoder=tf.keras.Sequential([
            layers.Dense(16,activation="relu"),
            layers.Dense(8,activation="relu")
        ])
        self.z_mean=layers.Dense(latent_dim)
        self.z_log_var=layers.Dense(latent_dim)

        #Decoder
        self.decoder=tf.keras.Sequential([
            layers.Dense(16,activation="relu"),
            layers.Dense(8,activation="relu"),
            layers.Dense(input_dim)
        ])
    def sample(self,z_mean,z_log_var):
        eps=tf.random.normal(shape=tf.shape(z_mean))
        return z_mean+tf.exp(0.5*z_log_var)*eps
    
    def call(self,inputs):
        encode=self.encoder(inputs)
        zmean=self.z_mean(encode)
        zlog_var=self.z_log_var(encode)
        z=self.sample(zmean,zlog_var)
        recontrucion=self.decoder(z)

        kl_loss=-0.5*tf.reduce_mean(1+zlog_var-tf.square(zmean)-tf.exp(zlog_var))
        self.add_loss(kl_loss)

        return recontrucion





In [22]:
vae=VAE(input_dim=feature.shape[1])
vae.compile(optimizer="adam",loss="mse")

In [23]:
vae.fit(feature,feature,epochs=30,batch_size=32)

Epoch 1/30
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 1.0896
Epoch 2/30
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.0312
Epoch 3/30
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.0043
Epoch 4/30
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.9965
Epoch 5/30
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.9833
Epoch 6/30
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.9963
Epoch 7/30
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.9859
Epoch 8/30
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.9857
Epoch 9/30
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.9657
Epoch 10/30
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.9536
Epoch 11/

<keras.src.callbacks.history.History at 0x1f4d8b16960>

In [24]:
reconstructed=vae.predict(feature)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


In [25]:
reconstructed_error=np.mean(np.square(feature-reconstructed),axis=1)
reconstructed_error

array([1.16312588e-01, 4.72679726e-01, 6.15468172e-02, 5.19774911e-02,
       1.67718698e-01, 1.31781076e-01, 3.73679421e-01, 4.55806561e-01,
       2.53981719e-02, 5.94850064e-01, 5.45086164e-01, 1.00399711e-01,
       5.87296577e-01, 8.23805943e-01, 1.82317641e-01, 7.48492795e-01,
       4.41495510e-01, 5.85041265e-01, 2.06730928e-01, 2.14521871e-01,
       5.80597166e-02, 1.68988554e-01, 5.69280852e-01, 1.02708869e-01,
       1.84877033e-02, 2.17436303e-02, 2.01371625e-01, 4.36009088e-01,
       3.93875134e-01, 6.01291299e-01, 1.14677129e-01, 1.12450068e-02,
       1.00915563e-01, 1.43417766e-01, 2.86204568e-01, 4.95353448e-02,
       2.81726030e-02, 2.00493782e-01, 1.89970464e-01, 5.48224453e-02,
       3.71490973e-01, 2.09796106e-01, 1.07512021e-01, 6.75740119e-01,
       3.86781644e-02, 2.55339651e-01, 2.26895696e-01, 6.65730103e-02,
       4.40967810e-02, 3.66359627e-01, 7.76001820e-01, 1.38381252e-02,
       2.05479937e+00, 2.58377315e-01, 3.28164116e-01, 2.91796511e-01,
      

In [26]:
threshold=np.percentile(reconstructed_error,95)
threshold

np.float64(1.3507052620791484)

In [30]:
numpy_arr=np.array(reconstructed_error)
for i in numpy_arr:
    print(i)

0.11631258833839364
0.47267972585306534
0.061546817159109854
0.051977491105835376
0.1677186982352425
0.13178107550948498
0.37367942109442387
0.45580656071588044
0.02539817193427941
0.5948500639735261
0.5450861641481254
0.10039971078596728
0.5872965769369111
0.8238059433369891
0.18231764132758727
0.7484927947885056
0.44149551023111966
0.5850412649902665
0.20673092825440387
0.2145218709222342
0.05805971662861786
0.16898855376443492
0.5692808522342548
0.10270886900239466
0.018487703275445494
0.02174363027627209
0.20137162491637456
0.4360090878251037
0.3938751343852349
0.6012912990638065
0.11467712857344209
0.011245006781419258
0.10091556322640968
0.14341776556825975
0.28620456812745027
0.049535344833276054
0.028172602959953642
0.20049378202704138
0.18997046368844767
0.05482244527822783
0.3714909729206145
0.2097961060019605
0.1075120205938681
0.6757401194587574
0.03867816438196456
0.25533965134133874
0.2268956956085743
0.06657301027565597
0.044096781024438016
0.3663596272069534
0.776001819

In [38]:
anomaly=reconstructed_error > threshold
anomaly.sum()

np.int64(50)