In [1]:
import os
import numpy as np
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from plotly import offline
import plotly.io as pio
import plotly.express as px
from datetime import datetime, timedelta

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)


In [2]:
files = ['august.csv', 'september.csv', 'october.csv'] 
PATH ='../data/private/csv/fam/'

df = [pd.read_csv(os.path.join(PATH, file)) 
      for file in files]

df = pd.concat(df, sort=False)
print('Total de registros: %s'%df['month'].count())

query = '(month == 8 and day >= 27) or (month == 9) or (month == 10 and day <= 7)'
df = df.query(query)
print('Total de registros: %s'%df['month'].count())

df.head()

Total de registros: 53160
Total de registros: 26218


Unnamed: 0,time,sysclass,lat,lon,dir,vel,size,ttyyyxx1,ttyyyxx2,ttyyyxx3,...,maxz,meanvil,ttvil,meanprec,maxprec,year,month,day,hour,minute
12871,0.0,0.0,-2.75,-58.57,-999.0,0.0,13.0,0.0,0.0,0.0,...,22.28,0.18,2.31,0.97,1.18,2014,8,27,2,24
12872,0.2,1.0,-2.73,-58.57,0.0,11.1,30.0,0.0,0.0,0.0,...,28.17,0.3,9.1,1.8,4.16,2014,8,27,2,24
12873,0.0,0.0,-2.73,-58.57,-999.0,0.0,21.0,0.0,0.0,0.0,...,23.01,0.18,3.76,1.02,1.32,2014,8,27,3,0
12874,0.2,1.0,-2.75,-58.57,180.0,11.1,13.0,0.0,0.0,0.0,...,22.28,0.18,2.28,0.97,1.09,2014,8,27,3,0
12875,0.0,0.0,-1.71,-59.63,-999.0,0.0,12.0,0.0,0.0,0.0,...,21.46,0.16,1.96,0.92,0.97,2014,8,27,3,48


In [4]:
threshold = 1
no = df[df['ttyyyxx3'] < threshold]
yes = df[df['ttyyyxx3'] >= threshold]

tn = no['month'].count()
ty = yes['month'].count()

labels = ['Sem raios detectados (%d)'%tn, 
         'Com raios detectados (%d)'%ty]
values= [tn, ty]
pie = [
    go.Pie(labels=labels, values=values, hole=.5, 
        marker=dict(colors=['#000000', '#D82D3B'], 
                    line=dict(color='#A0A0A0', width=1)))
]
fig = go.Figure(data=pie)

fig.update_layout(
    title='Quantidade de registros com e sem detecção de descargas atmosféricas',
#     font={'size':20},
    template='plotly_dark',
    legend_orientation="v"
)

# offline.plot(fig, image_filename='subconjuntos-com-e-sem-raios-fam', image='png')
iplot(fig)

In [6]:
group = df.groupby(['ttyyyxx3']).agg({'ttyyyxx3': 'count'})
group.head()

Unnamed: 0_level_0,ttyyyxx3
ttyyyxx3,Unnamed: 1_level_1
0.0,22820
1.0,1134
2.0,453
3.0,249
4.0,194


In [7]:
labels = [int(x) for x in group.index][5:100]
values = group['ttyyyxx3'].values[1:100]

fig = go.Figure()
ranges = [
#     (0, 25), 
    (5, 10), (10, 20), (20, 30), (30, 40), (40, 50)
]

for r in ranges:
    labels = [int(x) for x in group.index][r[0]:r[1]]
    values = group['ttyyyxx3'].values[r[0]:r[1]]

    fig.add_trace(
        go.Bar(x=labels, y=values, name='%d a %d detecções de raios'%(r[0], r[1]))
    )
fig.update_yaxes(title_text='Quantidade de registros')
fig.update_xaxes(title_text='Raios detectados a cada 12 minutos')
fig.update_layout(template='plotly_dark', title='Frequência do \
número de raios detectados a cada 12 minutos')
# offline.plot(fig, image_filename='frequencia-de-detecção-de-raios-fam', image='png')
offline.iplot(fig)

In [8]:
lightnings = df[df['ttyyyxx3'] < 100]['ttyyyxx3'].values

In [9]:
columns = [
    'ttyyyxx3', 'riverfrac', 'convfrac', 'strafrac', 
    'meanz', 'maxz', 'meanvil', 'ttvil', 'meanprec', 'maxprec',
]

print('Descrição dos registros com raios\n', \
      yes[columns[1:]].sample(1000).describe())
print('\n')
print('Descrição dos registros sem raios\n', \
      no[columns[1:]].sample(1000).describe())

Descrição dos registros com raios
          riverfrac     convfrac     strafrac        meanz         maxz  \
count  1000.000000  1000.000000  1000.000000  1000.000000  1000.000000   
mean     10.134650    41.262230    58.737760    34.345730    46.690220   
std      14.653194    23.040693    23.040688     4.567917     7.008362   
min       0.000000     0.000000     0.000000    20.530000    21.460000   
25%       0.000000    25.545000    49.435000    31.720000    43.512500   
50%       3.985000    33.010000    66.990000    35.230000    48.980000   
75%      15.385000    50.565000    74.455000    37.720000    51.550000   
max     100.000000   100.000000   100.000000    43.020000    59.950000   

           meanvil        ttvil     meanprec      maxprec  
count  1000.000000  1000.000000  1000.000000  1000.000000  
mean      2.047410   614.502010    13.435930   111.842100  
std       1.141388   955.233699     8.385626    79.156935  
min       0.200000     2.860000     0.950000     1.140000 

In [10]:
rows, cols, idx = 1, 3, 0
records = 100
indexes = ['maxz', 'ttvil', 'maxprec']
yaxes = [
    "Refletividade (dBZ)",
    "VIL (kg m<sup>-2</sup>)",
    "Precipitação (mm/h)"
]
colors = [
    ('#480B0B', '#C61D1D'),
    ('#0E4612', '#30FF41'),
    ('#00355D', '#0090FF')
#     ('#A23B72', '#F18F01'),
#     ('#705C64', '#4191B2'),
#     ('#C6DEE8', '#DC1F2A')
]
fig = make_subplots(rows=rows, cols=cols, 
                    subplot_titles=('Máxima refletividade', 'VIL', 'Máxima Precipitação'))

for i in range(rows):
    for j in range(cols):
        fig.add_trace(
            go.Box(y=no.sample(records)[indexes[idx]], #boxpoints='all',
            name='Sem detecção de raios (%s)'%(indexes[idx]),
            marker_color=colors[idx][0], boxmean=False),
            row=i+1, col=j+1
        )
        fig.add_trace(
            go.Box(y=yes.sample(records)[indexes[idx]], #boxpoints='all',
            name='Com detecção de raios (%s)'%(indexes[idx]),
            marker_color=colors[idx][1]),
            row=i+1, col=j+1
        )
        fig.update_yaxes(title_text=yaxes[idx], row=i+1, col=j+1)
        fig.update_xaxes(showticklabels=False, row=i+1, col=j+1)
        idx+=1

fig.update_layout(
    template='plotly_dark',
    legend_orientation="h",
    title='Distribuição de alguns índices (%d amostras de cada subconjunto)\n\n'%records
)
# print(fig['layout'])
# offline.plot(fig, image_filename='distribuicao-de-indices-fam', image='jpeg')
offline.iplot(fig)

In [11]:
fig = make_subplots(rows=3, cols=1, shared_xaxes=True)
thresholds = [1, 5, 10]
records = 40
# indexs = [(1, 1), (1, 2), (2, 1), (2, 2)]
indexs = [(1, 1), (2, 1), (3, 1)]
colors = [
    ('#480B0B', '#C61D1D'),
    ('#0E4612', '#30FF41'),
    ('#00355D', '#0090FF'),
#     ('#FFDB59', '#BAB1B1')
#     ('#8C8C8C', '#F18F01'), ('#4C333E', '#0053A0'),
#     ('#AF4F13', '#3EAD4F'), ('#708D81', '#8D0801')
    
]

for index, threshold, color in zip(indexs, thresholds, colors):
    row, line = index
    cset1, cset2 = color
    no = df[df['ttyyyxx3'] < threshold].sample(records)
    yes = df[df['ttyyyxx3'] >= threshold].sample(records)
    
    fig.add_trace (
        go.Box(x=no['maxz'], 
        name='Subconjunto com menos de %s raio(s) detectado(s)'%threshold,
        marker_color=cset1),
        row=row, col=line,
    )

    fig.add_trace (
        go.Box(x=yes['maxz'],
        name='Subconjunto com %s ou mais raio(s) detectado(s)'%threshold,
        marker_color=cset2),
        row=row, col=line
    )
    
    
    fig.update_yaxes(
        showticklabels=False
    )

fig['layout']['xaxis3'].update(title='Máxima refletividade (dBZ)')

fig.update_layout(
    legend=dict(x=-0.11, y=-0.30),
    legend_orientation="h",
    font={'size':14},
    title='Comparação da distribuição do índice de <b>máxima refletividade</b> entre '\
    'os subconjuntos <br />com e sem descargas elétricas atmosféricas', template='plotly_dark',
)

iplot(fig)

In [12]:
group = df.groupby(['month', 'day']).agg({'ttyyyxx3': 'sum'})
group.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ttyyyxx3
month,day,Unnamed: 2_level_1
8,27,0.0
8,28,0.0
8,29,0.0
8,30,0.0
8,31,0.0


In [13]:
amount = {}
for i, row in group.iterrows():
    m = str(i[0]) if len(str(i[0])) == 2 else '0'+str(i[0])
    d = str(i[1]) if len(str(i[1])) == 2 else '0'+str(i[1])
    key = '%s/%s'%(d, m)
    if not amount.get(key):
        amount[key] = {'lightning': 0}
    
    amount[key]['lightning'] += row['ttyyyxx3']

In [14]:
x, y = list(amount.keys()), \
          [x['lightning'] for x in amount.values()]

In [19]:
group['ttyyyxx3'].sum()

86354.0

In [16]:
values = y
labels = x

fig = go.Figure()

fig.add_trace(
    go.Scatter(y=values, x=labels, mode='lines+markers', \
    line=dict(color='#D82D3B'), name='Descargas atmosféricas')
)


fig.update_layout(
    font={'size':14},
    template='plotly_dark',
    xaxis_title='Dia observado',
    yaxis_title='Total de raios detectados no dia',
    title='Total de descargas atmosféricas detectadas por dia pela rede LINET'
)

iplot(fig)
offline.plot(fig, image_filename='distribuicao-de-indices-fam', 
             image_width=1000, image_height=500,
             image='png')

'temp-plot.html'