In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import json
import pandas as pd
from datetime import datetime, date

from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.palettes import viridis

output_notebook()

In [2]:
# Variables

rutaJson = 'data/log20201028.json'
timeFormat = '%H:%M:%S'
dateFormat = '%Y-%m-%d %H:%M:%S'
fechaGeneral = (2020, 10, 28)
dataJson = None

In [3]:
# Funciones

def parseIntValue(registro):
    # print(registro)
    timeStr = "{}-{}-{} {}".format(*fechaGeneral,registro['time'])
    
    registro['time'] = datetime.strptime(timeStr, dateFormat)

    if 'duration' in registro:
        try:
            registro['duration'] = int(registro['duration'])
        except ValueError as err:
            print(err.__str__())
    return registro

In [4]:
# Procesamiento

with open(rutaJson) as f:
    # data = pd.DataFrame(json.loads(line) for line in f)
    dataJson = json.load(f)

    # print(dataJson)

    dataJson = list(map(parseIntValue, dataJson))


In [5]:
datos = pd.DataFrame(dataJson)
datos

Unnamed: 0,class,time,action,duration,applicationName,href
0,system,2020-10-28 23:04:53,23:04:53 New session for Scoowy. [DESKTOP-KD6G...,,,
1,system,2020-10-28 23:04:53,"23:04:53 Basic account. Screenshots, Skype and...",,,
2,clipboard,2020-10-28 23:04:53,23:04:53 Clipboard :https://www.softzone.es/20...,,,
3,system,2020-10-28 23:05:16,New session for Scoowy. [DESKTOP-KD6GQPP IP: 1...,,,
4,system,2020-10-28 23:05:16,"Basic account. Screenshots, Skype and keystork...",,,
5,app,2020-10-28 23:04:54,Desktop,22.0,explorer,
6,app,2020-10-28 23:04:56,Kidlogger [],2.0,Kidlogger,
7,app,2020-10-28 23:04:57,Cisco Packet Tracer,19.0,PacketTracer7,
8,app,2020-10-28 23:05:37,kidlogger.net/knowledgebase1.html y 3 pÃ¡ginas...,0.0,msedge,
9,url,2020-10-28 23:05:37,kidlogger.net/knowledgebase1.html y 3 pÃ¡ginas...,19.0,,kidlogger.net


In [6]:
is_app = datos.loc[:, 'class'] == 'app'
df_app = datos.loc[is_app]
df_app = df_app.loc[:, ['class', 'time', 'duration', 'applicationName']]
df_app

Unnamed: 0,class,time,duration,applicationName
5,app,2020-10-28 23:04:54,22.0,explorer
6,app,2020-10-28 23:04:56,2.0,Kidlogger
7,app,2020-10-28 23:04:57,19.0,PacketTracer7
8,app,2020-10-28 23:05:37,0.0,msedge
10,app,2020-10-28 23:05:57,0.0,msedge
12,app,2020-10-28 23:06:47,16.0,explorer
13,app,2020-10-28 23:06:49,4.0,Kidlogger
14,app,2020-10-28 23:07:27,4.0,PacketTracer7
17,app,2020-10-28 23:07:56,18.0,explorer
18,app,2020-10-28 23:08:02,2.0,WhatsApp


In [7]:
group = df_app.groupby('applicationName', as_index=False)['duration'].sum()
group.rename(columns={'applicationName': 'x', 'duration': 'top'}, inplace=True)
group

Unnamed: 0,x,top
0,Kidlogger,10.0
1,PacketTracer7,23.0
2,SearchUI,7.0
3,WhatsApp,2.0
4,explorer,58.0
5,msedge,0.0


In [8]:
source = ColumnDataSource(group)
source.data

aplications = list(group['x'])
aplications

colors = viridis(len(aplications))
colors

('#440154', '#404387', '#29788E', '#22A784', '#79D151', '#FDE724')

In [9]:
p1 = figure(title='Uso de aplicaciones', plot_width=800, plot_height=600, x_range=aplications, x_axis_label='Aplicaciones', y_axis_label='Tiempo')
p1.vbar(x='x', top='top', width=0.5, source=source, bottom=0)
show(p1)