# SCDF TRAINING AI

General machine learning flow 

Retrieve logs from the database -> Use data to train model -> Learn the model weights and what contributes to the performance of the task -> predict performance for future missions and give suggestions on how to improve performance



# Importing required libraries

In [90]:
import json
import time

# Data analysis libraries
import pandas as pd
import numpy as np

# Machine learning libraries
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

# Database libraries
from cloudant.client import Cloudant
from cloudant.query import Query

In [3]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

 # Obtaining data from database

In [4]:
client = Cloudant.iam("adef00a8-b0a0-4d14-8305-6be0563ed542-bluemix", "4aQghGIAIrBzsJRx2fbspghgtQXPWLTfmvZKpetOaO7K", connect=True)
client.connect()

In [5]:
# Obtain database
smart_env = client['smart_environment']
query = Query(smart_env, selector={'_id': {'$gt': 0}})
receive = pd.DataFrame(query()['docs']).drop(['_id', '_rev'], axis=1)

In [75]:
# Helper Functions

# Heart Rate
def query_heart(embedded_data):
    return embedded_data['bpm'].tolist()

# Temp
def query_temp(embedded_data):
    return embedded_data['temperature'].tolist()

# Altitude
def query_altitude(embedded_data):
    return embedded_data['altitude'].tolist()

# Location
def query_location(embedded_data):
    return embedded_data['location'].tolist()

def query_details(batch,team,member):
    mission = pd.DataFrame(pd.DataFrame(pd.DataFrame(receive['info']).iloc[batch].iloc[team]).iloc[member])
    rows = len(mission.index)
    data = {}
    for i in range(rows):
        jsonstr = pd.DataFrame(mission.iloc[i])[i].to_json()
        d = json.loads(jsonstr)["1"]
        data[i] = d
    return pd.DataFrame.from_dict(data, orient='index')

Unnamed: 0,names,time,bpm,temperature,altitude,location
0,lim choo min-142,1592072663,95,23,92,"[0.0363065071, -0.6787487856]"
1,lim choo min-142,1592072664,82,43,109,"[0.8570840883, -1.4480987072]"
2,lim choo min-142,1592072665,67,23,66,"[0.5068812118, 1.150075752]"
3,lim choo min-142,1592072666,94,50,66,"[0.3665462502, -1.0061692126]"
4,lim choo min-142,1592072667,71,48,223,"[0.1599617003, -0.6116543698]"
5,lim choo min-142,1592072668,76,36,72,"[0.2764490527, 0.6197116986]"
6,lim choo min-142,1592072669,60,57,12,"[0.6046776536, 0.4899846343]"
7,lim choo min-142,1592072670,87,25,84,"[0.2527951893, -0.2596658254]"
8,lim choo min-142,1592072671,78,48,152,"[0.2215359336, -0.1985782272]"
9,lim choo min-142,1592072672,117,32,139,"[0.5178861052, 0.0839762277]"


In [76]:
mission = query_details(0,0,1)
print(mission.head())
#print("Altitude", query_altitude(mission))
print()
#print("Heart", query_heart(mission))
print()
#print("Location", query_location(mission))
print()
#print("Temperature", query_temp(mission))

              names        time  bpm  temperature  altitude  \
0  lim choo min-142  1592072663   95           23        92   
1  lim choo min-142  1592072664   82           43       109   
2  lim choo min-142  1592072665   67           23        66   
3  lim choo min-142  1592072666   94           50        66   
4  lim choo min-142  1592072667   71           48       223   

                        location  
0  [0.0363065071, -0.6787487856]  
1  [0.8570840883, -1.4480987072]  
2    [0.5068812118, 1.150075752]  
3  [0.3665462502, -1.0061692126]  
4  [0.1599617003, -0.6116543698]  





In [105]:
def get_bpm_spike(mission):
    data = []
    for i in range(len(mission.index) - 1):
        bpm1 = mission.loc[i,"bpm"]
        bpm2 = mission.loc[i+1,"bpm"]
        if (bpm2 - bpm1) > 50:
            data.append((bpm1,bpm2,mission.loc[i,"time"]))
    return data
get_bpm_spike(mission)

[(61, 119, 1592072745), (62, 116, 1592072761), (63, 115, 1592072766)]

In [114]:
stats = mission.describe()

start = stats.loc["min","time"]
end = stats.loc["max","time"] 


time_taken = (end - start)/(60)
print("Trainee : " + mission.loc[0,"names"])
print("Mission time : " + str(time_taken) + " Minutes")
print("Avg bpm : " + str(int(stats.loc["mean","bpm"])))
print("Avg temp : " + str(int(stats.loc["mean","temperature"])))
print("Max altitude : " + str(stats.loc["max","altitude"]))
print("Min altitude : " + str(stats.loc["min","altitude"]))

bpm_spikes =  get_bpm_spike(mission)
for i in range(len(bpm_spikes)):
    spike_point_min = int((end - bpm_spikes[i][2])) // 60
    spike_point_sec = int((end - bpm_spikes[i][2])) % 60
    print("! BPM spike at " + str(spike_point_min) + " minutes " + str(spike_point_sec) + " sec from " + \
          str(bpm_spikes[i][0]) + " to " + str(bpm_spikes[i][1]))

print("Performance rating : S" )

Trainee : lim choo min-142
Mission time : 7.6 Minutes
Avg bpm : 89
Avg temp : 39
Max altitude : 299.0
Min altitude : -9.0
! BPM spike at 6 minutes 14 sec from 61 to 119
! BPM spike at 5 minutes 58 sec from 62 to 116
! BPM spike at 5 minutes 53 sec from 63 to 115
Performance rating : S


In [24]:
def to_df_format(input):
    '''Transform data from mongodb to machine learning format'''
    pass


SyntaxError: invalid syntax (<ipython-input-24-f2b774a57127>, line 5)

# Model Training

In this section, we will collect and monitor the firefighter trainee performance
This will be used for the initial training

Subsequently, we can predefined certain thresholds to retrain the model at, so as to utilize new data to improve it

In [None]:
X_train,y_train = to_df_format(mission)

In [None]:
rf = RandomForestRegressor(n_estimators=30, random_state=42)
rf.fit(X_train, y_train)

In [None]:
print("Features sorted by their score:")
print sorted(zip(map(lambda x: round(x, 4), rf.feature_importances_), names), 
             reverse=True)

# Getting predictions

In [None]:
def get_current_stats:
    ''' Obtain the current details about the environment and trainee profile
    so that we get predictions on it'''
    
current_stats = get_current_stats

prediction = regressor.predict(current_stats)

In [None]:
def recommendations(prediction,curren_stats):
    ''' Give recommendations based on current stats using the model weights '''

# References

https://www.mongodb.com/blog/post/getting-started-with-python-and-mongodb