In [None]:
!git clone https://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@github.com/Meguazy/project_CSD.git

In [None]:
%cd project_CSD/

In [None]:
#Usare ogni volta che si inizia a lavorare per accertarsi che non ci siano
#cambiamenti non sincronizzati

!git pull

In [None]:
from google.colab import auth
auth.authenticate_user()

import requests
gcloud_token = !gcloud auth print-access-token
gcloud_tokeninfo = requests.get('https://www.googleapis.com/oauth2/v3/tokeninfo?access_token=' + gcloud_token[0]).json()
EMAIL = str(gcloud_tokeninfo['email'])

!echo $EMAIL

#Usare per fare commit atomici e frequenti.
#Ricordiamoci di usare mettere sempre dei messaggi di commit chiari in modo da
#poter rollbackare o cherry-pickare in caso di bisogno.

!git config --global user.email $EMAIL

!git add .
!git commit -m "Added sparse autoencored algorithm"
!git push

In [27]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import regularizers
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

In [6]:
# Load normal and anomalous datasets
normal_data_path = 'data/processed_data/TrainoCaso1/time_series.csv'
anomalous_data_path = 'data/processed_data/TrainoCaso2/time_series.csv'

In [7]:
# Load datasets
normal_data = pd.read_csv(normal_data_path)
anomalous_data = pd.read_csv(anomalous_data_path)

In [8]:
# Combine normal and anomalous data into one dataset
all_data = pd.concat([normal_data, anomalous_data], axis=0)

In [56]:
# Specify the columns representing time series data
time_series_columns = ['Axe1X', 'Axe1Y', 'Axe1Z', 'Axe2X', 'Axe2Y', 'Axe2Z']

In [58]:
# Standardize the time series data within each acquisition number group
scaler = StandardScaler()

In [61]:
# Apply the transformation to each group separately
transformed_data_list = []
for _, group in all_data.groupby('Acquisition Number'):
    transformed_data = scaler.fit_transform(group[time_series_columns])
    transformed_data_list.append(pd.DataFrame(transformed_data, columns=time_series_columns, index=group.index))

In [62]:
# Combine the transformed data with 'Acquisition Number' and 'time'
all_data_scaled = pd.concat([all_data[['Acquisition Number', 'Time']].reset_index(drop=True), pd.concat(transformed_data_list).reset_index(drop=True)], axis=1)

In [63]:
# Split the data into train and test sets
train_data, test_data = train_test_split(all_data_scaled, test_size=0.2, random_state=42)

In [64]:
# Define the sparse autoencoder model
input_dim = len(time_series_columns)
hidden_units = 3  # Adjust the number of neurons in the hidden layer as needed
sparsity_penalty = 0.01  # Adjust as needed

model = Sequential([
    Dense(units=hidden_units, activation='sigmoid', input_dim=input_dim,
          activity_regularizer=regularizers.l1(sparsity_penalty)),
    Dense(units=input_dim, activation='sigmoid')
])

In [65]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
# Train the model on normal data
model.fit(train_data[time_series_columns], train_data[time_series_columns],
          epochs=50, batch_size=32, shuffle=True, validation_split=0.1)

In [None]:
# Reconstruct both normal and anomalous test data
reconstructed_normal_data = model.predict(normal_data[time_series_columns])
reconstructed_anomalous_data = model.predict(anomalous_data[time_series_columns])

In [71]:
# Calculate reconstruction error for both sets
mse_normal = np.mean(np.square(normal_data[time_series_columns] - reconstructed_normal_data), axis=1)
mse_anomalous = np.mean(np.square(anomalous_data[time_series_columns] - reconstructed_anomalous_data), axis=1)

In [72]:
# Determine a threshold for anomaly detection (e.g., based on percentile of normal data)
threshold = np.percentile(mse_normal, 95)

In [73]:
# Identify anomalies in both normal and anomalous test data
predicted_normal_anomalies = normal_data[mse_normal > threshold]
predicted_anomalous_anomalies = anomalous_data[mse_anomalous > threshold]

In [None]:
# Print or visualize the detected anomalies
print("Predicted Anomalies in Normal Data:")
print(predicted_normal_anomalies)

print("\nPredicted Anomalies in Anomalous Data:")
print(predicted_anomalous_anomalies)

In [79]:
# Calculate True Positives and False Positives
true_positives = np.sum(mse_anomalous > threshold)
false_positives = np.sum(mse_normal > threshold)

In [80]:
# Calculate Precision
precision = true_positives / (true_positives + false_positives)

In [None]:
# Print Precision
print(f'Precision: {precision:.2f}')