In [1]:
%matplotlib widget

import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import os
import pandas as pd
import seaborn as sns

import sys
sys.path.append( '../src/' )

from datetime import timedelta
from mpl_toolkits.mplot3d import Axes3D
from ml.visualization import *


In [2]:
path = [ '../', 'datasets' ]
file = 'ds-gym-1sec.csv'

df = pd.read_csv( os.path.join( *path, file ), parse_dates = [ 'date' ] ).drop( 'alt', axis = 1 )

In [3]:
# adding seconds resolution to time index

df[ 'seconds' ] = df.groupby( pd.Grouper( key = 'date', freq = 'min' ) ).cumcount()
df[ 'date' ] = df.date + pd.to_timedelta( df.seconds, unit = 's' )
df = df.drop( columns = [ 'seconds' ] ).drop_duplicates( subset = [ 'date' ] ) # it were generated 2 duplicates, strange
df[ 'location' ] = [ 'gym' ] * len( df )
df 

Unnamed: 0,date,pre,hum,tem,occ,location
0,2019-09-18 19:04:00,95006.28,58.69,27.17,M,gym
1,2019-09-18 19:04:01,95004.94,58.67,27.17,M,gym
2,2019-09-18 19:04:02,95002.38,58.66,27.17,M,gym
3,2019-09-18 19:04:03,95003.33,58.59,27.18,M,gym
4,2019-09-18 19:04:04,95003.33,58.55,27.18,M,gym
...,...,...,...,...,...,...
10124,2019-09-24 16:17:05,94960.91,56.44,28.61,M,gym
10125,2019-09-24 16:17:06,94958.60,56.48,28.61,M,gym
10126,2019-09-24 16:17:07,94958.60,56.72,28.63,M,gym
10127,2019-09-24 16:17:08,94961.06,56.99,28.64,M,gym


In [4]:
 # removing null values

len( df ) - df.count()

date        0
pre         0
hum         4
tem         0
occ         0
location    0
dtype: int64

In [5]:
df[ 'hum' ].fillna( method ='ffill', inplace = True )
len( df ) - df.count()

date        0
pre         0
hum         0
tem         0
occ         0
location    0
dtype: int64

In [6]:
# date format mm-dd-yyyy
# Fixing an error on the dates

temp = df[ ( df.date >= '2019-01-01' ) & ( df.date <= '2019-02' ) ][ 'date' ] + timedelta( days = 264 )
df.loc[ temp.index, 'date' ] = temp
df = df.set_index( 'date' )

In [7]:
# getting recollection days
df.resample( 'D' ).first()

Unnamed: 0_level_0,pre,hum,tem,occ,location
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-09-18,95006.28,58.69,27.17,M,gym
2019-09-19,,,,,
2019-09-20,,,,,
2019-09-21,,,,,
2019-09-22,,,,,
2019-09-23,95210.48,59.5,25.95,M,gym
2019-09-24,95352.31,57.63,26.99,M,gym
2019-09-25,95238.48,63.04,27.01,L,gym
2019-09-26,,,,,
2019-09-27,,,,,


In [8]:
home_file = 'ds-residential-1sec.csv'
df_home = pd.read_csv( os.path.join( *path, home_file ), index_col = 'date', parse_dates = [ 'date' ] ).drop( 'ven', axis = 1 )
df_home[ 'location' ] = [ 'living room' ] * len( df_home )
df_home

Unnamed: 0_level_0,pre,hum,tem,occ,location
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-05-14 21:08:39,94270.77,54.49,30.99,L,living room
2020-05-14 21:08:40,94268.36,54.46,30.99,L,living room
2020-05-14 21:08:41,94266.11,54.46,30.99,L,living room
2020-05-14 21:08:42,94268.52,54.49,30.99,L,living room
2020-05-14 21:08:43,94269.70,54.50,30.99,L,living room
...,...,...,...,...,...
2020-06-04 23:08:55,94599.23,57.31,31.95,M,living room
2020-06-04 23:08:56,94595.97,57.35,31.95,M,living room
2020-06-04 23:08:57,94597.73,57.28,31.94,M,living room
2020-06-04 23:08:58,94595.98,57.23,31.94,M,living room


In [9]:
# merging boot datasets

df_merge = df_home.append( df )
df_merge

Unnamed: 0_level_0,pre,hum,tem,occ,location
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-05-14 21:08:39,94270.77,54.49,30.99,L,living room
2020-05-14 21:08:40,94268.36,54.46,30.99,L,living room
2020-05-14 21:08:41,94266.11,54.46,30.99,L,living room
2020-05-14 21:08:42,94268.52,54.49,30.99,L,living room
2020-05-14 21:08:43,94269.70,54.50,30.99,L,living room
...,...,...,...,...,...
2019-09-24 16:17:05,94960.91,56.44,28.61,M,gym
2019-09-24 16:17:06,94958.60,56.48,28.61,M,gym
2019-09-24 16:17:07,94958.60,56.72,28.63,M,gym
2019-09-24 16:17:08,94961.06,56.99,28.64,M,gym


In [10]:
out_path = [ '..', 'assets' ]
fmt = '{x:,.0f}'
tick = mtick.StrMethodFormatter( fmt )

fig, ax = plt.subplots( figsize = ( 7, 7 ) )
g = sns.boxplot( data = df_merge, x = 'occ', y = 'hum', hue = 'location', ax = ax, order = [ 'E', 'L', 'M', 'H' ] )
ax.set( xlabel = 'occupancy level', ylabel = 'humidity' )
# ax.set_title( 'Relative Humidity by Location ' )
fig.savefig( os.path.join( *out_path, 'boxplot_hum.png' ) )

fig, ax = plt.subplots( figsize = ( 7, 7 ) )
g = sns.boxplot( data = df_merge, x = 'occ', y = 'tem', hue = 'location', ax = ax, order = [ 'E', 'L', 'M', 'H' ] )
ax.set( xlabel = 'occupancy level', ylabel = 'temperature' )
# ax.set_title( 'Temperature by Location ' )
fig.savefig( os.path.join( *out_path, 'boxplot_tem.png' ) )

fig, ax = plt.subplots( figsize = ( 7, 7 ) )
g = sns.boxplot( data = df_merge, x = 'occ', y = 'pre', hue = 'location', ax = ax, order = [ 'E', 'L', 'M', 'H' ] )
ax.set( xlabel = 'occupancy level', ylabel = 'pressure' )
ax.yaxis.set_major_formatter( tick )
# ax.set_title( 'Atmosferic Pressure by Location ' )
fig.savefig( os.path.join( *out_path, 'boxplot_pre.png' ) )

# fig.suptitle( 'Environmental variables distribution grouped by location' )
# fig.savefig( os.path.join( *out_path, 'boxplot_gym.png' ) )


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [11]:
# from collections import Counter

# def plot_attr( df, attr, hue, ax, attr_name ):
#     grouped = df.groupby( hue )
#     temp = pd.DataFrame( { col: vals[ attr ] for col, vals in grouped } )
    
#     means = temp.median()
#     means.sort_values( ascending = False, inplace = True )
#     temp = temp[ means.index ]
#     g = temp.boxplot( ax = ax )
#     g.set_title( attr_name )

    
# fig, ax = plt.subplots( 1, 3, figsize = ( 18, 6 ) )

# plot_attr( df, 'hum', 'occ', ax[ 0 ], 'Humidity' )
# plot_attr( df, 'tem', 'occ', ax[ 1 ], 'Temperature' )
# plot_attr( df, 'pre', 'occ', ax[ 2 ], 'Pressure' )

# fig.suptitle( 'Fitness Gym data distribution', fontsize = 15 )



In [12]:
plot_env_vars( df.index.astype( str ), df[ 'tem' ], df[ 'hum' ], df[ 'occ' ] )

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [13]:
fig = plt.figure( figsize = ( 10, 10 ) )
ax = fig.add_subplot( 111, projection = '3d' )
conf = {
    'L': { 'c': 'gold', 'm': '^' },
    'M': { 'c': 'darkorange', 'm': 's' },
    'H': { 'c': 'red', 'm': '*' }
}

for name2, group2 in df.groupby( 'occ' ):
    ax.scatter( group2.hum, group2.tem, group2.pre, 
               c = conf[ name2 ][ 'c' ], 
               marker = conf[ name2 ][ 'm' ] )
    
ax.set_xlabel( 'Humidity' )
ax.set_ylabel( 'Temperature' )
ax.set_zlabel( 'Pressure' )

fmt = '{x:,.0f}'
tick = mtick.StrMethodFormatter( fmt )
ax.zaxis.set_major_formatter( tick )


fig.suptitle( 'Fitness Gym Temperature, Humidity, and Pressure' )

plt.show()
fig.savefig( os.path.join( *out_path, 'gym_env.png' ) )

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …