In [25]:
import pandas as pd
import numpy as np
import glob
import os
import pyarrow.parquet as pq
import plotly.graph_objects as go

In [26]:
## heatindex
comment = pd.read_parquet('data/NVDA_Stock_new.pq')
stock = pd.read_csv('stock_data/NVDA.csv')
stock['Date'] = pd.to_datetime(stock['Date'])
stock = stock.rename(columns={'Date': 'time'})
df = stock.merge(comment, on='time', how='outer')
filtered_df = df[(df['time'] >= '2023-04-01') & (df['time'] <= '2024-03-31')]
filtered_df['Close'] = filtered_df['Close'].apply(lambda x: np.log(x))
# 按照日期对 cbody 和 title 列进行去重计数
grouped = filtered_df.groupby('time').agg({'cbody': pd.Series.nunique, 'title': pd.Series.nunique})

# 计算每天的 heat index
grouped['heat_index'] = grouped['cbody'] + grouped['title']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [27]:
text_data_ai= pq.read_table('data/bets_ai_search.pq').to_pandas()['cbody'][:1000].str.cat()
text_data_nvda = pq.read_table('data/bets_nvda_search.pq').to_pandas()['cbody'][:1000].str.cat()

In [32]:
# Sample stock price data (replace with actual data)
stock_price = pd.read_parquet("output/stock_price.parquet")

# Sample topic-specific data (replace with actual data)
subreddits = list(set(df['subreddit']))

topic_data = {
}
for i in subreddits:
    topic_data[i] = list(df[df['subreddit']==i]['Polarity'])

KeyError: 'subreddit'

In [None]:
stock_data = stock_price.copy()

i want to generate a data dashboard with python plotly. the dashboard contains the following elements:
1) a timeseries plot with stock prices; 2) word cloud, i need a button to choose the a topic; 3) timeseries of specific topic, i need a button to choose the topic

In [31]:
import dash
import dash_core_components as dcc
from dash import html
from dash.dependencies import Input, Output
import plotly.express as px
from wordcloud import WordCloud
import pandas as pd
import numpy as np


# Initialize the Dash app
app = dash.Dash(__name__)

# Define app styles
app.layout = html.Div([
    html.H1("Data Dashboard", style={'textAlign': 'center', 'marginBottom': '20px'}),
    
    # Timeseries plot with stock prices
    dcc.Graph(
        id='stock-price-plot',
        config={'displayModeBar': False},
        style={'height': '400px'}
    ),
    
    # Word cloud
    html.Div([
        html.H3("Select topic in WallStreetBet to generate word cloud:", style={'textAlign': 'center'}),
        dcc.Dropdown(
            id='topic-dropdown',
            options=[{'label': topic, 'value': topic} for topic in ['NVDA','AI']],
            value=subreddits[0],
            style={'width': '50%', 'margin': '0 auto'}
        ),
#         html.H3("Word Cloud", style={'textAlign': 'center'}),
        dcc.Graph(
            id='word-cloud',
            config={'displayModeBar': False},
            style={'height': '300px'}
        )
    ], style={'marginBottom': '20px'}),
    
    # Dropdown for topic selection
    html.Div([
        html.H3("Select Company", style={'textAlign': 'center'}),
        dcc.Dropdown(
            id='company-dropdown',
            options=[{'label': topic, 'value': topic} for topic in subreddits],
            value=subreddits[0],
            style={'width': '50%', 'margin': '0 auto'}
        ),
        dcc.Graph(
            id='subreddit-timeseries-plot',
            config={'displayModeBar': False},
            style={'height': '300px'}
        )
    ], style={'marginBottom': '20px'}),
    
])

# Callback to update stock price plot
@app.callback(
    Output('stock-price-plot', 'figure'),
    Input('stock-price-plot', 'relayoutData')
)
def update_stock_price_plot(relayoutData):
    # Update stock price plot based on user interaction (if needed)
    fig = px.line(stock_data, x='Date', y='StockPrice', title='Stock Prices Over Time')
    fig.update_layout(
        margin=dict(l=20, r=20, t=40, b=20),
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        font=dict(family='Arial', size=12, color='black')
    )
    return fig

# Callback to update word cloud
@app.callback(
    Output('word-cloud', 'figure'),
    Input('topic-dropdown', 'value')
)
def update_word_cloud(selected_topic):
    # Generate word cloud
    if selected_topic =="AI":
        wordcloud = WordCloud(background_color = 'white',width=800, height=400).generate(text_data_ai)
    elif selected_topic =="NVDA":
        wordcloud = WordCloud(background_color = 'white',width=800, height=400).generate(text_data_nvda)
    else: 
        wordcloud = WordCloud(background_color = 'white',width=800, height=400).generate(text_data_ai)
    fig = px.imshow(wordcloud, title='Word Cloud')
    fig.update_layout(
        margin=dict(l=20, r=20, t=40, b=20),
        paper_bgcolor='white',  # Change paper background color to white
        plot_bgcolor='white',   # Change plot background color to white
        font=dict(family='Arial', size=12, color='White')
    )
    fig.update_xaxes(showticklabels=False)
    fig.update_yaxes(showticklabels=False)
    return fig

# Callback to update topic-specific timeseries plot
@app.callback(
    Output('subreddit-timeseries-plot', 'figure'),
    Input('company-dropdown', 'value')
)
def update_topic_timeseries_plot(selected_topic):
    # Update timeseries plot based on selected topic
    # 创建图表
    fig = go.Figure()

    # 添加 heat index 曲线
    fig.add_trace(go.Scatter(x=grouped.index, y=grouped['heat_index'], mode='lines', name='Heat Index', line=dict(color='firebrick')))

    # 添加 Close 曲线
    fig.add_trace(go.Scatter(x=grouped.index, y=filtered_df.groupby('time')['Close'].mean(), mode='lines', name='Close', yaxis='y2', line=dict(color='royalblue')))

    # 设置布局
    fig.update_layout(title='Heat Index and Close Over Time',
                      xaxis=dict(title='Time', tickangle=45),
                      yaxis=dict(title='Popularity Index', side='left', showgrid=False, zeroline=False, color='firebrick'),
                      yaxis2=dict(title='Stock Price', side='right', overlaying='y', showgrid=False, zeroline=False, color='royalblue'))
    fig.update_layout(
        margin=dict(l=20, r=20, t=40, b=20),
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        font=dict(family='Arial', size=12, color='black')
    )
    return fig

if __name__ == '__main__':
    app.run_server(debug=True,port=8080)