# Anomaly Detection using machine learning
## Import necessary libraries

In [35]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.metrics import f1_score
import warnings
warnings.filterwarnings('ignore')

## Load the datasets

In [36]:
train_df = pd.read_csv('./train.csv')
test_df = pd.read_csv('./test.csv')

## Display the first few rows of the train dataset

In [37]:
train_df.head()

## Preprocess the data
### We will use the 'value' column to train the model

In [38]:
X_train = train_df[['value']].values
X_test = test_df[['value']].values

## Initialize the model

In [39]:
model = IsolationForest(contamination=0.01)

## Fit the model on the training data

In [40]:
model.fit(X_train)

## Predict anomalies on the test data

In [41]:
test_df['is_anomaly'] = model.predict(X_test)

## Convert predictions: -1 (anomaly) to 1, and 1 (normal) to 0

In [42]:
test_df['is_anomaly'] = test_df['is_anomaly'].map({-1: 1, 1: 0})

## Prepare the submission file

In [43]:
submission_df = test_df[['timestamp', 'is_anomaly']]
submission_df.to_csv('./Submission.csv', index=False)

## Evaluate the model

### We need the ground truth values to calculate the F1 score
### Assuming 'is_anomaly' in the test data is the ground truth

In [44]:
y_true = test_df['is_anomaly'].values
y_pred = model.predict(X_test)
y_pred = np.where(y_pred == -1, 1, 0)

## Calculate F1 score

In [45]:
f1 = f1_score(y_true, y_pred)
print(f"F1 Score: {f1}")

## Display the first few rows of the submission file

In [46]:
submission_df.head()

In [47]:
submission_df