In [1]:
import os
import numpy as np
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.io as pio
import plotly.express as px
from datetime import datetime, timedelta

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)


In [2]:
files = ['august.csv', 'september.csv', 'october.csv'] 
PATH ='../data/private/csv/mat/'

df = [pd.read_csv(os.path.join(PATH, file)) 
      for file in files]

df = pd.concat(df, sort=False)
print('Total de registros: %s'%df['month'].count())

query = '(month == 8 and day >= 27) or (month == 9) or (month == 10 and day <= 7)'
df = df.query(query)
print('Total de registros: %s'%df['month'].count())

df.head()

Total de registros: 6353567
Total de registros: 2515607


Unnamed: 0,year,month,day,hour,minute,n_cluster,lon,lat,ind_x,ind_y,reflect,yyyyy_xx1,yyyyy_xx2,yyyyyyy_xx3,yyy_xx4,yyy_xx5,river,cxe,precipit,vil
1201483,2014.0,8.0,27.0,2.0,24.0,1.0,-58.5914,-2.7089,190.0,98.0,20.2031,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.8128,0.1473
1201484,2014.0,8.0,27.0,2.0,24.0,1.0,-58.5714,-2.7089,191.0,98.0,21.125,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9364,0.1666
1201485,2014.0,8.0,27.0,2.0,24.0,1.0,-58.6114,-2.7289,189.0,99.0,20.2031,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.8128,0.1473
1201486,2014.0,8.0,27.0,2.0,24.0,1.0,-58.5914,-2.7289,190.0,99.0,20.2031,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.8128,0.2011
1201487,2014.0,8.0,27.0,2.0,24.0,1.0,-58.5714,-2.7289,191.0,99.0,20.9375,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9098,0.1874


In [3]:
threshold = 1
no = df[df['yyyyyyy_xx3'] < threshold]
yes = df[df['yyyyyyy_xx3'] >= threshold]

tn = no['month'].count()
ty = yes['month'].count()

labels = ['Sem raios detectados (%d)'%tn, 
         'Com raios detectados (%d)'%ty]
values= [tn, ty]
pie = [
    go.Pie(labels=labels, values=values, hole=.5, 
        marker=dict(colors=['#000000', '#D82D3B'], 
                    line=dict(color='#A0A0A0', width=1)))
]
fig = go.Figure(data=pie)

fig.update_layout(
    title='Quantidade de registros com e sem detecção de descargas atmosféricas',
    font={'size':20},
    template='plotly_dark',
    legend_orientation="v"
)
iplot(fig)

In [4]:
columns = [
    'yyyyyyy_xx3', 'reflect', 'vil', 
    'precipit', 'river', 'cxe',  
]

print('Descrição dos registros com raios\n', \
      yes[columns[1:]].sample(1000).describe())
print('\n')
print('Descrição dos registros sem raios\n', \
      no[columns[1:]].sample(1000).describe())

Descrição dos registros com raios
            reflect          vil     precipit        river          cxe
count  1000.000000  1000.000000  1000.000000  1000.000000  1000.000000
mean     34.184281     4.056517    25.542495     1.100000     1.437000
std       9.084428     5.247687    38.381156     3.301651     0.496263
min      19.984400     0.147300     0.786000     0.000000     1.000000
25%      26.164075     0.753775     2.060075     0.000000     1.000000
50%      33.859400     1.918950     6.988150     0.000000     1.000000
75%      41.773425     5.074575    34.659075     0.000000     2.000000
max      54.875000    36.895500   269.823000    11.000000     2.000000


Descrição dos registros sem raios
            reflect          vil     precipit        river          cxe
count  1000.000000  1000.000000  1000.000000  1000.000000  1000.000000
mean     28.202861     1.243513     7.908758     0.935000     1.364000
std       6.820212     1.917121    16.917738     3.069234     0.481389
min  

In [5]:
rows, cols, idx = 1, 3, 0
records = 100
indexes = ['reflect', 'vil', 'precipit']
yaxes = [
    "Refletividade (dBZ)",
    "VIL (kg m<sup>-2</sup>)",
    "Precipitação (mm/h)"
]
colors = [
    ('#480B0B', '#C61D1D'),
    ('#0E4612', '#30FF41'),
    ('#00355D', '#0090FF')
#     ('#A23B72', '#F18F01'),
#     ('#705C64', '#4191B2'),
#     ('#C6DEE8', '#DC1F2A')
]
fig = make_subplots(rows=rows, cols=cols, 
                    subplot_titles=('Refletividade', 'VIL', 'Precipitação'))

for i in range(rows):
    for j in range(cols):
        fig.add_trace(
            go.Box(y=no.sample(records)[indexes[idx]], #boxpoints='all',
            name='Sem detecção de raios (%s)'%(indexes[idx]),
            marker_color=colors[idx][0], boxmean=False),
            row=i+1, col=j+1
        )
        fig.add_trace(
            go.Box(y=yes.sample(records)[indexes[idx]], #boxpoints='all',
            name='Com detecção de raios (%s)'%(indexes[idx]),
            marker_color=colors[idx][1]),
            row=i+1, col=j+1
        )
        fig.update_yaxes(title_text=yaxes[idx], row=i+1, col=j+1)
        fig.update_xaxes(showticklabels=False, row=i+1, col=j+1)
        idx+=1

fig.update_layout(
    template='plotly_dark',
    title='Distribuição de índices em subconjutos com e sem descargas \
atmosféricas (%d amostras de cada subconjunto)'%records
)

iplot(fig)

In [6]:
fig = make_subplots(rows=3, cols=1, shared_xaxes=True)
thresholds = [1, 5, 10]
records = 40
# indexs = [(1, 1), (1, 2), (2, 1), (2, 2)]
indexs = [(1, 1), (2, 1), (3, 1)]
colors = [
    ('#480B0B', '#C61D1D'),
    ('#0E4612', '#30FF41'),
    ('#00355D', '#0090FF'),
#     ('#FFDB59', '#BAB1B1')
#     ('#8C8C8C', '#F18F01'), ('#4C333E', '#0053A0'),
#     ('#AF4F13', '#3EAD4F'), ('#708D81', '#8D0801')
    
]

for index, threshold, color in zip(indexs, thresholds, colors):
    row, line = index
    cset1, cset2 = color
    no = df[df['yyyyyyy_xx3'] < threshold].sample(records)
    yes = df[df['yyyyyyy_xx3'] >= threshold].sample(records)
    
    fig.add_trace (
        go.Box(x=no['reflect'], 
        name='Subconjunto com menos de %s raio(s) detectado(s)'%threshold,
        marker_color=cset1),
        row=row, col=line,
    )

    fig.add_trace (
        go.Box(x=yes['reflect'],
        name='Subconjunto com %s ou mais raio(s) detectado(s)'%threshold,
        marker_color=cset2),
        row=row, col=line
    )
    
    
    fig.update_yaxes(
        showticklabels=False
    )

fig['layout']['xaxis3'].update(title='Refletividade (dBZ)')

fig.update_layout(
    legend=dict(x=-0.11, y=-0.30),
    legend_orientation="h",
    font={'size':14},
    title='Comparação da distribuição do índice de <b>refletividade</b> entre '\
    'os subconjuntos <br />com e sem descargas elétricas atmosféricas', template='plotly_dark',
)

iplot(fig)

In [7]:
group = df.groupby(['month', 'day']).agg({'yyyyyyy_xx3': 'sum'})
group.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,yyyyyyy_xx3
month,day,Unnamed: 2_level_1
8.0,27.0,0.0
8.0,28.0,0.0
8.0,29.0,0.0
8.0,30.0,0.0
8.0,31.0,0.0


In [8]:
amount = {}
for i, row in group.iterrows():
    m = str(i[0]) if len(str(i[0])) == 2 else '0'+str(i[0])
    d = str(i[1]) if len(str(i[1])) == 2 else '0'+str(i[1])
    key = '%s/%s'%(d, m)
    if not amount.get(key):
        amount[key] = {'lightning': 0}
    
    amount[key]['lightning'] += row['yyyyyyy_xx3']

In [9]:
x, y = list(amount.keys()), \
          [x['lightning'] for x in amount.values()]

In [10]:
values = y
labels = x

fig = go.Figure()

fig.add_trace(
    go.Scatter(y=values, x=labels, mode='lines+markers', \
    line=dict(color='#D82D3B'), name='Descargas atmosféricas')
)


fig.update_layout(
    font={'size':14},
    template='plotly_dark',
    xaxis_title='Dia observado',
    yaxis_title='Total de raios detectados no dia',
    title='Total de descargas atmosféricas detectadas por dia pela rede LINET'
)

iplot(fig)
# fig.write_image("fig1.pdf")