In [1]:
import pandas as pd
import sys
sys.path.append('../..')
import sla



In [2]:
structured_logs = pd.read_csv('./data/structured_logs.csv')
structured_logs['datetime'] = pd.to_datetime(structured_logs['date'] + ' ' + structured_logs['time'])
structured_logs.head()

Unnamed: 0,date,time,level,ip,category,message,datetime
0,2025 Feb 7,13:00:01,notice,10.2.1.20,System,Log rotation: Successfully rotated Activity Log.,2025-02-07 13:00:01
1,2025 Feb 7,13:00:17,notice,10.2.1.20,Host Listener,Accepted connection 1 from Client:10.8.63.24 o...,2025-02-07 13:00:17
2,2025 Feb 7,13:00:17,notice,10.2.1.20,Host Listener,Accepted connection 2 from Client:10.8.63.24 o...,2025-02-07 13:00:17
3,2025 Feb 7,13:00:17,notice,10.2.1.20,Host Listener,"Listener(Port 8005, NIC 1) - socket 23 closed",2025-02-07 13:00:17
4,2025 Feb 7,13:00:17,notice,10.2.1.20,Host Listener,"Listener(Port 8005, NIC 1) - socket 24 closed",2025-02-07 13:00:17


### Agregación de datos String

In [3]:
import sla.transformer

# Crear instancia del agregador
aggregator = sla.transformer.StringAggregator(structured_logs, timestamp_column='datetime')
    
    # Definir métricas personalizadas
column_metrics = {
'category': ['count'],
'ip': ['nunique'],
'datetime': ['mean', 'max', 'min']
    }
    
# Definir conteo por categorías
category_count_columns = {
        'level': list(set(structured_logs.level.to_list())),
        'category': list(set(structured_logs.category.to_list()))
    }
    
aggregate_logs = aggregator.create_time_aggregation(
        time_window='5min', 
        column_metrics=column_metrics,
        category_count_columns=category_count_columns
    )

aggregate_logs.rename(columns={'category_count':'total_events'},inplace=True)
aggregate_logs.head()

Unnamed: 0,total_events,ip_nunique,level_critical_count,level_information_count,level_debug_count,level_notice_count,level_alert_count,category_Front Panel_count,category_Logger_count,category_ACS_count,category_Host Listener_count,category_SNMP Agent_count,category_Process Monitor_count,category_System_count,avg_time_between_events_seconds,min_time_between_events_seconds,max_time_between_events_seconds
2024-10-31 02:00:00,4,1,0,0,1,3,0,0,0,2,0,1,0,1,49.333333,0.0,148.0
2024-10-31 02:05:00,3,1,0,0,1,2,0,0,0,2,0,1,0,0,0.0,0.0,0.0
2024-10-31 02:10:00,3,1,0,0,1,2,0,0,0,2,0,1,0,0,0.0,0.0,0.0
2024-10-31 02:15:00,4,1,0,0,2,2,0,0,0,2,0,2,0,0,0.333333,0.0,1.0
2024-10-31 02:20:00,3,1,0,0,1,2,0,0,0,2,0,1,0,0,0.0,0.0,0.0


### Ventana deslizante 

In [4]:
 # Initialize transformer
transformer =  sla.transformer.RollingAgregator(
        window_size=10,
        aggregation_functions=['mean', 'std', 'min', 'max'],
    )
    
# Apply transformation
rolling_logs = transformer.fit_transform(aggregate_logs).fillna(0)
rolling_logs.head()

Unnamed: 0,total_events,ip_nunique,level_critical_count,level_information_count,level_debug_count,level_notice_count,level_alert_count,category_Front Panel_count,category_Logger_count,category_ACS_count,...,avg_time_between_events_seconds_rolling_min,avg_time_between_events_seconds_rolling_max,min_time_between_events_seconds_rolling_mean,min_time_between_events_seconds_rolling_std,min_time_between_events_seconds_rolling_min,min_time_between_events_seconds_rolling_max,max_time_between_events_seconds_rolling_mean,max_time_between_events_seconds_rolling_std,max_time_between_events_seconds_rolling_min,max_time_between_events_seconds_rolling_max
2024-10-31 02:00:00,4,1,0,0,1,3,0,0,0,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2024-10-31 02:05:00,3,1,0,0,1,2,0,0,0,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2024-10-31 02:10:00,3,1,0,0,1,2,0,0,0,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2024-10-31 02:15:00,4,1,0,0,2,2,0,0,0,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2024-10-31 02:20:00,3,1,0,0,1,2,0,0,0,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Isolation Forest

In [5]:
import sla.detectors

X_train = rolling_logs

if_detector = sla.detectors.IsolationForestDetector(n_estimators=100, random_state=42)
if_detector.fit(X_train)
predictions = if_detector.predict(X_train)
scores = if_detector.decision_function(X_train)
anomalies = if_detector.get_anomalies(X_train)
proba = if_detector.predict_proba(X_train)


anomalies.head()


Unnamed: 0,total_events,ip_nunique,level_critical_count,level_information_count,level_debug_count,level_notice_count,level_alert_count,category_Front Panel_count,category_Logger_count,category_ACS_count,...,avg_time_between_events_seconds_rolling_min,avg_time_between_events_seconds_rolling_max,min_time_between_events_seconds_rolling_mean,min_time_between_events_seconds_rolling_std,min_time_between_events_seconds_rolling_min,min_time_between_events_seconds_rolling_max,max_time_between_events_seconds_rolling_mean,max_time_between_events_seconds_rolling_std,max_time_between_events_seconds_rolling_min,max_time_between_events_seconds_rolling_max
2024-10-31 02:00:00,4,1,0,0,1,3,0,0,0,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2024-10-31 02:45:00,3,1,0,0,1,2,0,0,0,2,...,0.0,49.333333,0.0,0.0,0.0,0.0,14.9,46.767629,0.0,148.0
2024-10-31 02:50:00,3,1,0,0,1,2,0,0,0,2,...,0.0,0.333333,0.0,0.0,0.0,0.0,0.1,0.316228,0.0,1.0
2024-10-31 02:55:00,3,1,0,0,1,2,0,0,0,2,...,0.0,0.333333,0.0,0.0,0.0,0.0,0.1,0.316228,0.0,1.0
2024-10-31 03:00:00,3,1,0,0,1,2,0,0,0,2,...,0.0,0.333333,0.0,0.0,0.0,0.0,0.1,0.316228,0.0,1.0


### RRCF

In [8]:
# Initialize the detector
rrcf_detector = sla.detectors.RRCFDetector()

# Get anomaly scores
anomaly_scores = rrcf_detector.fit_predict(rolling_logs.tail(300))

# Get anomalies with a threshold
anomalies = rrcf_detector.get_anomalies(threshold=3.0)

# Print the results
print("Anomaly Scores:\n", anomaly_scores)
print("Anomalies:\n", anomalies)

Anomaly Scores:
 0        5.228697
1       13.133933
2       10.051186
3       10.760757
4       13.901677
          ...    
295      7.752721
296      5.255091
297      4.390024
298      4.892815
299    107.490485
Length: 300, dtype: float64
Anomalies:
 0        5.228697
1       13.133933
2       10.051186
3       10.760757
4       13.901677
          ...    
295      7.752721
296      5.255091
297      4.390024
298      4.892815
299    107.490485
Length: 279, dtype: float64
