In [1]:
import plotly
import plotly.express as px
import plotly.graph_objects as go
import dash
import jupyter_dash as jd
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
import dash_bootstrap_components as dbc
from dash.dependencies import Output, Input, State
from dash.exceptions import PreventUpdate
from dash_table import DataTable
import pandas as pd
pd.options.display.max_columns = None

for p in [plotly, dash, jd, dcc, html, dbc, pd,]:
    print(f'{p.__name__:-<30}v{p.__version__}')

plotly------------------------v4.14.3
dash--------------------------v1.19.0
jupyter_dash------------------v0.4.0
dash_core_components----------v1.15.0
dash_html_components----------v1.1.2
dash_bootstrap_components-----v0.11.3
pandas------------------------v1.2.3


In [52]:
df = pd.read_csv('data/2019.csv')
df['num_chars'] = df['full_text'].str.len()
df['num_words'] = df['full_text'].str.split().str.len()


df2 = pd.read_csv('data/tweets.csv')
df2['weekday'] = pd.to_datetime(df2['date']).dt.weekday
df2['year'] = pd.to_datetime(df2['date']).dt.year
df2['month'] = pd.to_datetime(df2['date']).dt.month


df3 = df2['year'].value_counts().sort_index().to_frame().reset_index()
fig = px.pie(df3,values='year',names='index')
# Top words by freq

# word_tweet_freq=[]
# for tweet in df['full_text']:
#     for words in tweet.split():
#         word_tweet_freq.append(words)
# word_fre = FreqDist(word_tweet_freq)

# create a Df with th efrequency of each word
#df_words = pd.DataFrame.from_dict(dict_to_df)

app = JupyterDash(__name__)

app.layout = html.Div([
    html.H1('Exploratory Data Analisis - EDA'),
    html.Br(),
    html.H2("Data understanding I"),
    html.Br(),
    dbc.Label('EDA I'),
    html.Br(),
    dbc.Label('Modify number of bins:'),
            dcc.Slider(id='hist_bins_slider', 
                       dots=True, min=0, max=100, step=5, included=False,
                       marks={x: str(x) for x in range(0, 105, 5)}),
    html.Br(),
    dbc.Row([
        
        dbc.Col([
            dbc.Label('Histogram - Character tweets distribution by length.'),
            dcc.Graph(id='chars_freq_hist')
        ]),
        dbc.Col([
            dbc.Label('Histogram - Words tweets distribution by length.'),
            dcc.Graph(id='words_freq_hist')
        ]),
    ]),
    
    
    html.Br(),
    html.H2('Data understanding II'),
    html.Br(),
    dbc.Label('Bar chart - Tweets frequency by weekday.'),
    dcc.Dropdown(id='year_weekday_dropdown',
                 options=[{'label': year, 'value': year} for year in df2['year'].drop_duplicates().sort_values()]),
    dcc.Graph(id='bar_freq_weekday'),
    
    html.Br(),
    dbc.Label('Pie chart - Tweets distribution by year.'),
    dcc.Graph(figure=fig)
    
    
    
    ])

@app.callback(Output('chars_freq_hist', 'figure'),
              Output('words_freq_hist', 'figure'),
              Input('hist_bins_slider', 'value'))

def plot_freq_hist(nbins):
    fig1 = px.histogram(df,
                        x='num_chars',
                        nbins=nbins,
                        color_discrete_sequence=['#5BC0BE'])
                        #marginal="rug")
    
    fig2 = px.histogram(df,
                        x='num_words',
                        nbins=nbins,
                        color_discrete_sequence=['#5BC0BE'])
                        #marginal="rug")
    return fig1, fig2

@app.callback(Output('bar_freq_weekday', 'figure'),
              Input('year_weekday_dropdown', 'value'))
def plot_tweets_freq_weekday(year):
    df = df2[df2['year'].eq(year)]
    df = df['weekday'].value_counts().sort_index().to_frame().reset_index()
    
    fig = px.bar(df,
                 x='index',
                 y='weekday')
    return fig




app.run_server(debug=True, port=8042)

Dash app running on http://127.0.0.1:8041/


In [34]:
df = pd.read_csv('data/2019.csv')
df['num_chars'] = df['full_text'].str.len()
df['num_words'] = df['full_text'].str.split().str.len()


df2 = pd.read_csv('data/tweets.csv')
df2['weekday'] = pd.to_datetime(df2['date']).dt.weekday
df2['year'] = pd.to_datetime(df2['date']).dt.year
df2['month'] = pd.to_datetime(df2['date']).dt.month
df3 = df2[df2['year'].eq(2019)].groupby('weekday').value_counts()
df3.head()

AttributeError: 'DataFrameGroupBy' object has no attribute 'value_counts'

In [46]:
df2['year'].value_counts().sort_index().to_frame().reset_index()

Unnamed: 0,index,year
0,2019,94325
1,2020,84660
2,2021,84027
3,2022,39995


In [2]:
df = pd.read_csv('data/tweets.csv')
df['weekday'] = pd.to_datetime(df['date']).dt.weekday
df['year'] = pd.to_datetime(df['date']).dt.year
df['month'] = pd.to_datetime(df['date']).dt.month|

In [3]:
df.head()

Unnamed: 0,full_text,user,location,date,tweet_id,number_rt,number_likes,number_reply,conversation_id,weekday,year,month
0,"#AliadosSemana Jorge Andrés Carrillo Cardoso,...",RevistaSemana,"Medellín, Colombia",2021-10-27 23:37:00+00:00,1453506043363971074,1,3,1,1453506043363971074,2,2021,10
1,@minuto30com A el no le cuesta ni le cistara ...,Waac0370,"Medellín, Colombia",2021-10-27 23:16:38+00:00,1453500919207927809,0,2,0,1453486400490332163,2,2021,10
2,@QuinteroCalle Lo quiero ver a usted @Quinter...,pelortiz,"Medellín, Colombia",2021-10-27 23:12:25+00:00,1453499858833354756,0,1,0,1453497447460872193,2,2021,10
3,@QuinteroCalle @AlcaldiadeMed ese discurso es...,coherederocrist,"Medellín, Colombia",2021-10-27 21:57:58+00:00,1453481123279872005,1,3,0,1453460986321686533,2,2021,10
4,@pelusherojo @anamarqr @patriciayorkyy Increí...,exfuncionarios,"Medellín, Colombia",2021-10-27 21:48:50+00:00,1453478822158942224,17,40,0,1453423807713185792,2,2021,10
