### Loading the dataset

In [1]:
import pandas as pd

Delivery_truck_trip_data_df = pd.read_csv('temp/Delivery_truck_trip_data_df.csv')


In [2]:
X = Delivery_truck_trip_data_df.drop(['ontime'], axis=1)


### Normalizing

In [3]:
from sklearn.preprocessing import QuantileTransformer

qt = QuantileTransformer(output_distribution='normal')
X = qt.fit_transform(X)


### Feature Scaling

In [4]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X = scaler.fit_transform(X)


In [5]:
df = pd.DataFrame(X)
df = pd.concat([df, Delivery_truck_trip_data_df['ontime']], axis=1)


### Splitting the dataset

In [6]:
# Separate the records based on the label
normal_records = df[df['ontime'] == True]
anomaly_records = df[df['ontime'] == False]

X_normal_records = normal_records.drop(['ontime'], axis=1)
y_normal_records = normal_records['ontime']

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_normal_records, y_normal_records, test_size=0.2, random_state=42)

train = pd.concat([X_train, y_train], axis=1)
test = pd.concat([X_test, y_test], axis=1)


In [7]:
# Randomly sample an equal number of False records
anomaly_records_sampled = anomaly_records.sample(n=len(test))

# First DataFrame: Equal number of True and False records
df_test = pd.concat([test, anomaly_records_sampled]).sample(frac=1).reset_index(drop=True)

# Second DataFrame: Remaining False records
df_remaining_normal = train.reset_index(drop=True)


In [8]:
X_anomaly = anomaly_records.drop(['ontime'], axis=1).to_numpy()
X_test = df_test.drop(['ontime'], axis=1).to_numpy()
y_test = df_test['ontime'].map({True: -1, False: 1}).to_numpy()
X_normal = df_remaining_normal.drop(['ontime'], axis=1).to_numpy()
y_normal = df_remaining_normal['ontime'].map({True: -1, False: 1}).to_numpy()


### Import Necessary Libraries:

In [9]:
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix


### Create and Train One-Class SVM Model

In [10]:
# Initialize One-Class SVM
clf = OneClassSVM(gamma='auto', nu=0.05)  # nu is an approximation of outlier fraction

# Fit the model
clf.fit(X_normal)


### Predict Anomalies

In [11]:
# Predict using the trained model
y_pred_train = clf.predict(X_normal)
y_pred_test = clf.predict(X_test)  # Only if you have test data

# Values of -1 indicate anomalies


### Evaluate the Model

In [12]:
confusion_matrix(y_test, y_pred_test)

array([[ 40, 806],
       [194, 652]])

In [13]:
print(classification_report(y_test, y_pred_test))

              precision    recall  f1-score   support

          -1       0.17      0.05      0.07       846
           1       0.45      0.77      0.57       846

    accuracy                           0.41      1692
   macro avg       0.31      0.41      0.32      1692
weighted avg       0.31      0.41      0.32      1692
