In [52]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from afinn import Afinn
from dash import Dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output

In [40]:
# https://nealcaren.org/lessons/wordlists/
afinn = Afinn(language='en')

In [41]:
social_media_df = pd.read_csv('hw05-data/YInt Social Media Data/YInt.csv')
social_media_df['time'] = pd.to_datetime(social_media_df['time'])

In [42]:
# social_media_df['sentiment_score'] = social_media_df['message'].apply(afinn.score)
def apply_sentiment_analysis(message):
    return afinn.score(str(message))
social_media_df['sentiment_score'] = social_media_df['message'].apply(apply_sentiment_analysis)

In [43]:
# group messages by 1 hour
social_media_df['time'] = pd.to_datetime(social_media_df['time'], errors='coerce').dt.floor('1h')

In [44]:
messages_by_neighborhood = social_media_df[['time', 'location', 'sentiment_score']].groupby(['time', 'location']).median().reset_index()

In [45]:
def int_to_neighborhood(i):
    match i:
        case(1):
            return 'Palace Hills'
        case(2):
            return 'Northwest'
        case(3):
            return 'Old Town'
        case(4):
            return 'Safe Town'
        case(5):
            return 'Southwest'
        case(6):
            return 'Downtown'
        case(7):
            return 'Wilson Forest'
        case(8):
            return 'Scenic Vista'
        case(9):
            return 'Broadview'
        case(10):
            return 'Chapparal'
        case(11):
            return 'Terrapin Springs'
        case(12):
            return 'Pepper Mill'
        case(13):
            return 'Cheddarford'
        case(14):
            return 'Easton'
        case(15):
            return 'Weston'
        case(16):
            return 'Southton'
        case(17):
            return 'Oak Willow'
        case(18):
            return 'East Parton'
        case(19):
            return 'West Parton'

In [46]:
damage_report_df = pd.read_csv('hw05-data/Damage Reports/mc1-reports-data.csv')
damage_report_df['time'] = pd.to_datetime(damage_report_df['time'])
# damage_report_df['location'] = damage_report_df['location'].astype(str)

    
damage_report_df['time'] = pd.to_datetime(damage_report_df['time'], errors='coerce').dt.floor('1h')
damage_report_df['location'] = damage_report_df['location'].apply(int_to_neighborhood)
grouped_damage = damage_report_df.groupby(['location', 'time']).max().reset_index()
grouped_damage['overall_damage'] = grouped_damage[['sewer_and_water', 'power', 'roads_and_bridges', 'medical', 'buildings']].mean(axis=1)
grouped_damage

Unnamed: 0,location,time,sewer_and_water,power,roads_and_bridges,medical,buildings,shake_intensity,overall_damage
0,Broadview,2020-04-06 00:00:00,8.0,9.0,9.0,6.0,9.0,,8.2
1,Broadview,2020-04-06 01:00:00,10.0,10.0,10.0,10.0,10.0,,10.0
2,Broadview,2020-04-06 02:00:00,10.0,10.0,10.0,10.0,10.0,,10.0
3,Broadview,2020-04-06 03:00:00,10.0,10.0,10.0,10.0,10.0,,10.0
4,Broadview,2020-04-06 04:00:00,10.0,10.0,10.0,10.0,10.0,0.0,10.0
...,...,...,...,...,...,...,...,...,...
1987,Wilson Forest,2020-04-09 17:00:00,,8.0,7.0,,,5.0,7.5
1988,Wilson Forest,2020-04-09 18:00:00,,8.0,7.0,,,4.0,7.5
1989,Wilson Forest,2020-04-09 23:00:00,,10.0,0.0,,,,5.0
1990,Wilson Forest,2020-04-10 19:00:00,,10.0,2.0,,,0.0,6.0


In [47]:
merged_sentiment_shake = messages_by_neighborhood.merge(grouped_damage[['time', 'location', 'shake_intensity', 'overall_damage']], how='inner', on=['time', 'location'])
merged_sentiment_shake

Unnamed: 0,time,location,sentiment_score,shake_intensity,overall_damage
0,2020-04-06 00:00:00,Broadview,0.0,,8.20
1,2020-04-06 00:00:00,Cheddarford,1.5,,4.50
2,2020-04-06 00:00:00,Downtown,-3.0,1.0,9.60
3,2020-04-06 00:00:00,East Parton,1.5,0.0,7.00
4,2020-04-06 00:00:00,Old Town,-1.5,1.0,9.60
...,...,...,...,...,...
1723,2020-04-10 11:00:00,Southton,0.0,,7.80
1724,2020-04-10 11:00:00,Southwest,0.0,0.0,8.80
1725,2020-04-10 11:00:00,Terrapin Springs,0.0,1.0,9.60
1726,2020-04-10 11:00:00,West Parton,0.0,0.0,9.25


In [48]:
traces = {}
for location in merged_sentiment_shake['location'].unique():
    filtered_df = merged_sentiment_shake[merged_sentiment_shake['location'] == location]
    traces[location] = []
    traces[location].extend(
        [go.Scatter(
            x = filtered_df['time'],
            y = filtered_df['sentiment_score'],
            name = f"mean sentiment score"
        ),
        go.Scatter(
            x = filtered_df['time'],
            y = filtered_df['overall_damage'],
            name = f"mean damage score"
        ),
        go.Scatter(
            x = filtered_df['time'],
            y = filtered_df['shake_intensity'],
            name = f"max shake intensity"
        )]
    )

In [51]:
fig = go.Figure()
location = "Southton"
for trace in traces[location]:
    fig.add_trace(trace)
fig.update_layout({
    "title": f"Damage vs Shake Intensity vs Social Media Sentiment Score for {location}"
}
)
fig.show()

In [94]:
figure = go.Figure()
app = Dash(__name__)
app.layout = html.Div([
    dcc.Dropdown(
        merged_sentiment_shake['location'].unique(), 
        'Southton', 
        id='selected_location',
        style={'width':'50%', 'margin-left':'auto'}
    ),
    dcc.Graph(id='my-graph', figure=figure),
])
@app.callback(
    Output(component_id='my-graph', component_property='figure'),
    Input(component_id='selected_location', component_property='value')
)
def update_graph(selected_location):
    print(f'updating to {selected_location}')
    figure = go.Figure()
    for trace in traces[selected_location]:
        figure.add_trace(trace)
    figure.update_layout({
        "title": f"Damage vs Shake Intensity vs Social Media Sentiment Score for {selected_location}"
    })
    return figure

app.run_server(debug=True)

updating to Southton
updating to Cheddarford
updating to Downtown
updating to Old Town
updating to Scenic Vista
updating to Broadview
updating to East Parton
