In [5]:
import pymongo
import pandas as pd
import os
import sys
import json
import pickle
from sklearn.preprocessing import MinMaxScaler, binarize
from sklearn import metrics

In [6]:
MONGO_USER = "root"
MONGO_PASS = "rootPass"
MONGO_HOST = "lattice-101.cs.colostate.edu"
MONGO_PORT = 27018
MONGO_URL = f"mongodb://{MONGO_USER}:{MONGO_PASS}@{MONGO_HOST}:{MONGO_PORT}"
DB_NAME = "sustaindb"

FEATURE_FIELDS = [
    "PRESSURE_REDUCED_TO_MSL_PASCAL",
    "VISIBILITY_AT_SURFACE_METERS",
    "VISIBILITY_AT_CLOUD_TOP_METERS",
    "WIND_GUST_SPEED_AT_SURFACE_METERS_PER_SEC",
    "PRESSURE_AT_SURFACE_PASCAL",
]

# LABEL_FIELD = "CATEGORICAL_SNOW_SURFACE_BINARY"
LABEL_FIELD = "CATEGORICAL_RAIN_SURFACE_BINARY"

In [7]:
db_connection = pymongo.MongoClient(MONGO_URL)
db = db_connection[DB_NAME]
noaa = db['noaa_nam']

In [8]:
gis_joins = noaa.distinct('GISJOIN')
print(f'No. of GISJOINs: {len(gis_joins)}')
with open('./gis_joins.json', 'w') as out_file:
    out_file.write(str(gis_joins))

No. of GISJOINs: 3088


In [14]:
gis_join = gis_joins[1]
print(f'gis_join: {gis_join}')
query = {'GISJOIN': gis_join}
# Build projection
projection = {"_id": 0}
for feature in FEATURE_FIELDS:
    projection[feature] = 1
projection[LABEL_FIELD] = 1

single_raw_data = noaa.find(query, projection)
print(f'[+] Queried {gis_join}')

# Load trained model
model = pickle.load(open('model.pkl', 'rb'))
features_df = pd.DataFrame(list(single_raw_data))
scaled = MinMaxScaler(feature_range=(0, 1)).fit_transform(features_df)
print('[+] Scaled')
features_df = pd.DataFrame(scaled, columns=features_df.columns)

label_df = features_df.pop(LABEL_FIELD)

#inputs_numpy = features_df.to_numpy()
#print(f'inputs_numpy: {inputs_numpy}')
y_true = label_df.values
print(f'y_true: {y_true}')

# Predict
y_pred_class = model.predict(features_df.values)
print(f'y_pred_class: {y_pred_class}')

accuracy = metrics.accuracy_score(y_true, y_pred_class)
print(f'Accuracy: {accuracy}')
print(f'Percentage of 1s: {y_true.mean()}')
print(f'Percentage of 0s: {1 - y_true.mean()}')

gis_join: G0100030
[+] Queried G0100030


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


[+] Scaled
y_true: [1. 1. 1. ... 0. 0. 0.]
y_pred_class: [0. 0. 0. ... 0. 0. 0.]
Accuracy: 0.8598762704374724
Percentage of 1s: 0.1401237295625276
Percentage of 0s: 0.8598762704374724




In [15]:
thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
inputs_numpy = features_df.to_numpy()
for t in thresholds:
    # y_pred_prob = (model.predict_proba(features_df.values)[:, 1] >= t).astype(int)
    y_pred_prob = (model.predict_proba(inputs_numpy) >= t).astype(int)
    # Calculate Precision
    precision = metrics.precision_score(y_true, y_pred_class, zero_division=0)
    print(f"Precision (t = {t}): {precision}")

    # Calculate Recall
    recall = metrics.recall_score(y_true, y_pred_class, zero_division=0)
    print(f"Recall (t = {t}): {recall}")

    # ROC Curves and Area Under the Curve (AUC)
    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred_prob)
    roc_auc_score = metrics.roc_auc_score(y_true, y_pred_prob)
    print(f"roc_auc_score (t = {t}): {roc_auc_score}")
    print()



Precision (t = 0.1): 0.0
Recall (t = 0.1): 0.0
roc_auc_score (t = 0.1): 0.5

Precision (t = 0.2): 0.0
Recall (t = 0.2): 0.0
roc_auc_score (t = 0.2): 0.5

Precision (t = 0.3): 0.0
Recall (t = 0.3): 0.0
roc_auc_score (t = 0.3): 0.5

Precision (t = 0.4): 0.0
Recall (t = 0.4): 0.0
roc_auc_score (t = 0.4): 0.5

Precision (t = 0.5): 0.0
Recall (t = 0.5): 0.0
roc_auc_score (t = 0.5): 0.5

Precision (t = 0.6): 0.0
Recall (t = 0.6): 0.0
roc_auc_score (t = 0.6): 0.5

Precision (t = 0.7): 0.0
Recall (t = 0.7): 0.0
roc_auc_score (t = 0.7): 0.5

Precision (t = 0.8): 0.0
Recall (t = 0.8): 0.0
roc_auc_score (t = 0.8): 0.5

Precision (t = 0.9): 0.0
Recall (t = 0.9): 0.0
roc_auc_score (t = 0.9): 0.5



