In [1]:
import json
from tqdm import tqdm_notebook
import numpy as np
import pandas as pd
from copy import copy
import sys
sys.path.append('/home/ndsviriden/MinMax94/src/utils') 
from interpolation import interpolate_mmx, create_patterns
from constants import mmcc_forecast_columns, mmx_columns, MmccRwisColumns, MmxColumns, mmx_meteo_columns
from converters import convert_rp5_to_mmx, convert_raw_to_mmx, convert_mmx_to_mmcc_forecast, convert_raw_to_mmx, \
                convert_mmx_to_mmcc_rwis, convert_mmcc_rwis_to_mmx, convert_input_for_anomaly_detection
from loaders import load_rp5_stations, load_mm94_stations
from geographical import find_nearest_wmo_station, add_solar_angles
from mmcc_input import get_mmcc_input_json, get_mmcc_prediction
from map_data_dicts import map_data_mmcc_rwis_to_mmx
from preprocessing import create_feature_df
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import lightgbm as lgb
from constants import params_anomaly_feature_selection, anomaly_threshold
import pickle

## Load data

In [2]:
mm94_station_id = 1896

raw = load_mm94_stations(mm94_station_id)
mmx_rwis = convert_raw_to_mmx(raw)

mmcc_rwis = convert_mmx_to_mmcc_rwis(mmx_rwis)
#mmx_rwis_interpolated = interpolate_mmx(mmx_rwis)
#mmx_rwis_interpolated = create_patterns(mmx_rwis_interpolated)
#mmcc_rwis = convert_mmx_to_mmcc_rwis(mmx_rwis_interpolated)
mmcc_rwis = mmcc_rwis.set_index("date_time_utc")

In [3]:
wmo_station_id = find_nearest_wmo_station(mm94_station_id, verbose=True)
rp5 = load_rp5_stations(wmo_station_id)
mmx_forecast = convert_rp5_to_mmx(rp5)
mmx_forecast_interpolated = interpolate_mmx(mmx_forecast, 20)
mmcc_forecast = convert_mmx_to_mmcc_forecast(mmx_forecast_interpolated)
mmcc_forecast = mmcc_forecast.set_index("date_time_utc")

distance([68.286167 33.355477], [67.97 32.88]) = 40.43 km


##  Transform into anomaly input format

In [4]:
start = pd.Timestamp(2016, 3, 17)
end = pd.Timestamp(2016, 3, 19)
mmcc_rwis_part = mmcc_rwis[(mmcc_rwis.index <= end) & (mmcc_rwis.index >= start)]

prediction_list = []

for time_record in tqdm_notebook(mmcc_rwis_part.index):
    
    data = get_mmcc_input_json(mm94_station_id, mmcc_rwis_part, mmcc_forecast, time_record)
    prediction_list.append(data)
        

input_anomaly = {'station_id': mm94_station_id, 'meteo_data': prediction_list}

Widget Javascript not detected.  It may not be installed or enabled properly.





## Pipeline of the algorithm

In [5]:
# Convert input json format into more convenient pd.Dataframe
test_input_json = input_anomaly['meteo_data'][-31]  # select one record 
record_to_check = max(test_input_json['rwis_data'].keys()) 
mmx_rwis = convert_input_for_anomaly_detection(test_input_json)

# Select sensor for analysis
target = MmxColumns.ROAD_TEMPERATURE  # example: data_t_road
target_feature = target.replace('data', 'target')  # example: target_t_road
sensor = target.split('_', 1)[-1]  # example: t_road

# Feature selection
features_anomaly_params = params_anomaly_feature_selection[target]
df = create_feature_df(mmx_rwis, target,  **features_anomaly_params)
features = [col for col in df if col.startswith('data_')]

# Create X, y for regression
X = np.array(df[features].values)
y_true = np.array(df[target_feature].values)

# Create LightGBM Matrix
lgb_Xy = lgb.Dataset(X, y_true, feature_name=features)

# Load model
detector_path = '/mnt/HARD/MinMax94/models/anomaly_detection/detector_{0}.pickle'.format(sensor)

with open(detector_path, 'rb') as detector:
    anomaly_detector = pickle.load(detector)
    
# Load threshold
threshold = anomaly_threshold[target]
    
# Predict values and label anomalies
y_pred = anomaly_detector.predict(X)
df['decision_function'] = np.abs(y_true - y_pred)
df['anomaly_label'] = df['decision_function'] > threshold

# Extract label of the last measurement
target_record_label = df['anomaly_label'].iloc[-1]  # the last measurment

# Create output json
test_output_json = copy(test_input_json)

# If anomaly: 1. change value for 9999, 2. write something in logs
if target_record_label:
    print('Anomaly! We should write something in logs!')
    test_output_json['rwis_data'][record_to_check][MmccRwisColumns.ROAD_TEMPERATURE] = 9999

Anomaly! We should write something in logs!
