In [16]:
import polars as pl
import pandas as pd
import plotly.express as px 
import pyarrow
from datetime import datetime
import plotly.graph_objects as go
from plotly.subplots import make_subplots


In [17]:
lf = pl.scan_csv('D:\Base_de_dados_anvisa\Top10_substances.csv')

lf.head().collect()

PRINCIPIO_ATIVO,UF_VENDA,DATA,count
str,str,i64,i64
"""ESTOLATO DE ER...","""SP""",102014,5948
"""CLORIDRATO DE ...","""SP""",102014,5885
"""ESTOLATO DE ER...","""SP""",72014,5809
"""CLORIDRATO DE ...","""SP""",92014,5696
"""CLORIDRATO DE ...","""SP""",72014,5618


In [27]:
lf_hp = pl.scan_csv('D:\Base_de_dados_anvisa\hipertrofia.csv')

lf_hp.head().collect()

Mês,Hipertrofia: (Brasil)
str,i64
"""2014-01""",97
"""2014-02""",98
"""2014-03""",84
"""2014-04""",80
"""2014-05""",82


In [29]:
lf_gt = pl.scan_csv('D:\Base_de_dados_anvisa\multiTimeline.csv')

lf_gt.head().collect()

Mês,impotência: (Brasil)
str,i64
"""2014-01""",19
"""2014-02""",18
"""2014-03""",20
"""2014-04""",20
"""2014-05""",22


In [20]:
def correct_data(LazyFrame, col_name):
    z=8
    q = (
        lf
        .with_columns(
            pl.col(col_name)
            .cast(pl.Utf8)
        )
        .with_columns(
            pl.when(
                pl.col(col_name).cast(pl.Utf8).str.lengths() > z
            )
            .then(
                pl.col(col_name).cast(pl.Utf8)
            )
            .otherwise(
                pl.concat_str([pl.lit("0" * z), pl.col(col_name).cast(pl.Utf8)]).str.slice(-z)
            )
            .alias("DATETIME")
        )
        .with_columns(
            pl.col('DATETIME').str.replace('00','01').alias('DATETIME')
        )
        .with_columns(
            pl.col('DATETIME').str.strptime(pl.Date, fmt='%d%m%Y').cast(pl.Datetime)
        )
        .filter(
            pl.col('DATETIME') <= pl.lit((datetime(2021, 7, 1)))
        )
        .sort(
            by=pl.col('DATETIME')
        )

    )
    df = q.collect()
    
    return df


In [21]:
df = correct_data(lf, 'DATA')
df

PRINCIPIO_ATIVO,UF_VENDA,DATA,count,DATETIME
str,str,str,i64,datetime[μs]
"""CLORIDRATO DE ...","""SP""","""12014""",5055,2014-01-01 00:00:00
"""CLORIDRATO DE ...","""SP""","""12014""",4869,2014-01-01 00:00:00
"""ESTOLATO DE ER...","""SP""","""12014""",4218,2014-01-01 00:00:00
"""CLORIDRATO DE ...","""SP""","""12014""",3976,2014-01-01 00:00:00
"""TOPIRAMATO""","""SP""","""12014""",3643,2014-01-01 00:00:00
"""CLORIDRATO DE ...","""SP""","""12014""",3443,2014-01-01 00:00:00
"""CLORIDRATO DE ...","""SP""","""12014""",3177,2014-01-01 00:00:00
"""CLORIDRATO DE ...","""SP""","""12014""",2365,2014-01-01 00:00:00
"""GABAPENTINA""","""SP""","""12014""",1841,2014-01-01 00:00:00
"""CLORIDRATO DE ...","""MG""","""12014""",1725,2014-01-01 00:00:00


In [22]:
def TopSubstances(df, every_period='1mo'): 
    df_substances = (
        df
        .groupby(
            ['PRINCIPIO_ATIVO','DATETIME']).agg(pl.col('count').sum()
        )
        .sort(
            'DATETIME'
            )
        .groupby_dynamic(
            index_column='DATETIME',by='PRINCIPIO_ATIVO', every=every_period, period=every_period
            )
            .agg(
                [pl.col('count').sum()]
                )
        .filter(
            pl.struct('PRINCIPIO_ATIVO').is_in([
                                                # {'PRINCIPIO_ATIVO':'GABAPENTINA'}, 
                                                {'PRINCIPIO_ATIVO':'TESTOSTERONA'},
                                                # {'PRINCIPIO_ATIVO':'CLORIDRATO DE AMITRIPTILINA'},
                                                # {'PRINCIPIO_ATIVO':'CLORIDRATO DE BUPROPIONA'},
                                                # {'PRINCIPIO_ATIVO':'TOPIRAMATO'},
                                                # {'PRINCIPIO_ATIVO':'ESTOLATO DE ERITROMICINA'},
                                                # {'PRINCIPIO_ATIVO':'CLORIDRATO DE SERTRALINA'},
                                                # {'PRINCIPIO_ATIVO':'CLORIDRATO DE PAROXETINA'},
                                                # {'PRINCIPIO_ATIVO':'CLORIDRATO DE TRAMADOL'},
                                                # {'PRINCIPIO_ATIVO':'CLORIDRATO DE FLUOXETINA'}
                                                ])
        )
    )
    return df_substances


In [23]:
df_substances = TopSubstances(df,'1mo')
df_substances = df_substances.to_pandas()
px.line(df_substances, x='DATETIME', y='count', color='PRINCIPIO_ATIVO').update_layout(
                                                                            xaxis_title="Data", yaxis_title="Volume de vendas"
                                                                         )

In [24]:
# Convert str to datetime
def googleLFDataPrep(lf):   
    lf_gt =(
             lf
            .with_columns(
                pl.col('Mês').str.strptime(pl.Date, fmt='%Y-%m').cast(pl.Datetime)
            )
    ).collect()
    
    return lf_gt

In [30]:
df_substances = TopSubstances(df,'1mo')
df_substances = df_substances.to_pandas()

lf_gt = googleLFDataPrep(lf_gt)
lf_gt = lf_gt.to_pandas()


lf_hp = googleLFDataPrep(lf_hp)
lf_hp = lf_hp.to_pandas()

In [31]:

fig = make_subplots(specs=[[{"secondary_y": True}]], shared_xaxes=True)

fig.add_trace(px.line(df_substances, x='DATETIME', y='count', labels='testo').data[0])
fig.add_trace(px.line(lf_gt, x='Mês', y='impotência: (Brasil)', labels='testo').data[0], secondary_y=True)

fig.data[0].showlegend = True
fig.data[0].name = 'Label for first trace'
fig.data[0].line = dict(color='red') # change color to red

fig.data[1].showlegend = True
fig.data[1].name = 'Label for second trace'
fig.data[1].line = dict(color='blue') # change color to blue

fig.update_yaxes(title_text='Quantidade de vendas',secondary_y=False)
fig.update_yaxes(title_text='<b>Google Trends</b> (interesse no termo)',secondary_y=True)

fig.update_xaxes(title_text='data')

fig.show()

In [43]:
fig = make_subplots(specs=[[{"secondary_y": True}]], shared_xaxes=True)

fig.add_trace(px.line(df_substances, x='DATETIME', y='count').data[0])
fig.add_trace(px.line(lf_gt, x='Mês', y='impotência: (Brasil)').data[0], secondary_y=True)

fig.add_trace(px.line(lf_hp, x='Mês', y='Hipertrofia: (Brasil)').data[0], secondary_y=True)


fig.data[0].showlegend = True
fig.data[0].name = 'Label for first trace'
fig.data[0].line = dict(color='red') # change color to red

fig.data[1].showlegend = True
fig.data[1].name = 'Label for second trace'
fig.data[1].line = dict(color='blue') # change color to blue

fig.data[2].showlegend = True
fig.data[2].name = 'Label for first trace'
fig.data[2].line = dict(color='green') # change color to red

fig.update_yaxes(title_text='Quantidade de vendas',secondary_y=False)
fig.update_yaxes(title_text='<b>Google Trends</b> (interesse no termo)',secondary_y=True)

fig.update_xaxes(title_text='data')

fig.show()

In [180]:
fig = make_subplots(specs=[[{"secondary_y": True}]],shared_xaxes=True,)

fig.add_trace(go.Scatter(
                        x=df_substances['DATETIME'],
                        y=df_substances['count'],
                        yaxis='y',
                        mode='lines',
                        marker=dict(
                            color='red'
                            ),
                        line=dict(
                            width=7
                            ),
                        opacity=1,
                        name='Testosterona',
                        line_shape='spline'
                        ),
                        secondary_y=False
    )

fig.add_trace(go.Scatter(
                        x=lf_gt['Mês'], 
                        y=lf_gt['impotência: (Brasil)'],
                        yaxis='y1',
                        mode='lines',
                        name='Google Trends: Impotência',
                        marker=dict(
                            symbol='53',
                            size=11

                        ),
                        line = dict(
                            color='blue',
                            width=5
                            ),
                        opacity=0.3,
                        line_shape='spline'
                        ),
                        secondary_y=True
    )

fig.add_trace(go.Scatter(
                        x=lf_hp['Mês'], 
                        y=lf_hp['Hipertrofia: (Brasil)'],
                        yaxis='y1',
                        mode='lines',
                        name='Google Trends: Hipertrofia',
                        marker=dict(
                            symbol='51',
                            size=11

                        ),
                        line = dict(
                            color='violet',
                            width=5
                            ),
                        opacity=0.3,
                        line_shape='spline'
                        ),
                        secondary_y=True
                        )


fig.update_layout(
    xaxis=dict(
        title='Data',
        titlefont=dict(
            size=30
        ),
        tickfont=dict(
            size=25
        )
    ),
    width=1500,
    height=1000,
    yaxis=dict(
        title="<b>Quantidade vendida<b>",
        titlefont=dict(
            color="red",
            size=30
            
        ),
        tickfont=dict(
            color="red",
            size=25
        )
    ),
    yaxis2=dict(
        title="Google trends (interesse no termo)",
        titlefont=dict(
            color="black",
            size=30
        ),
        tickfont=dict(
            color="black",
            size=25
        ),
        # anchor="free",
        overlaying="y",
        side="right",
        # position=0.15
    ))


fig.show()