<a href="https://colab.research.google.com/github/ElenaIurchenkoMatlas/twitterelection_dataanalysis/blob/main/Dashboard_Streamlit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib as plt
import matplotlib.pyplot as plt
!pip install streamlit
!pip install --upgrade streamlit
!streamlit cache clear
!pip install pyngrok
!pip install plotly
import streamlit as st
from datetime import timedelta, datetime



In [None]:
# Define file paths in Google Drive
file_path = r"/content/drive/MyDrive/DataAnalysisTwitterElection2020/df_combined.csv"

# Read the CSVs correctly
df_combined = pd.read_csv(file_path)

In [None]:
df_combined.head()

Unnamed: 0,candidate,created_at,tweet,retweet_count,user_screen_name,user_join_date,user_location,country,continent,state,...,sentiment_score_vader,sentiment_tbl,sentiment_score_tbl,sentiment_agreement,sentiment_bert,sentiment_score,topic_lda,topic_bert,user_age_group,representative_word
0,Joe Biden,2020-10-15 00:00:02,"#Trump: As a student I used to hear for years,...",1.0,snarke,2007-08-26 05:56:11,Portland,USA,North America,Oregon,...,68.1,positive,66.7,True,negative,0.013582,1,-1,<1year,election
1,Joe Biden,2020-10-15 00:00:08,You get a tie! And you get a tie! #Trump ‘s ra...,3.0,Ranaabtar,2009-06-15 19:05:35,Washington DC,USA,North America,District of Columbia,...,50.0,neutral,50.0,True,negative,0.033794,4,34,<1year,people
2,Joe Biden,2020-10-15 00:00:17,@CLady62 Her 15 minutes were over long time ag...,0.0,FarrisFlagg,2013-02-01 01:37:38,"Perris,California",USA,North America,California,...,30.9,negative,44.6,True,negative,0.003601,3,-1,<1year,election
3,Joe Biden,2020-10-15 00:00:18,@DeeviousDenise @realDonaldTrump @nypost There...,0.0,sm_gulledge,2017-08-24 16:45:49,"Ohio, USA",USA,North America,Ohio,...,63.7,positive,52.5,True,negative,0.070799,1,-1,<1year,election
4,Joe Biden,2020-10-15 00:00:20,One of the single most effective remedies to e...,0.0,jamieo33,2012-03-30 00:30:54,"Pennsylvania, USA",USA,North America,Pennsylvania,...,73.8,positive,55.5,True,negative,0.050497,2,-1,<1year,election


In [None]:
df_combined.dtypes

Unnamed: 0,0
candidate,object
created_at,object
tweet,object
retweet_count,float64
user_screen_name,object
user_join_date,object
user_location,object
country,object
continent,object
state,object


In [None]:
df_combined['created_at'] = pd.to_datetime(df_combined['created_at'], errors='coerce')
df_combined['user_join_date'] = pd.to_datetime(df_combined['user_join_date'], errors='coerce')

In [None]:
# Title of the dashboard
st.title("Twitter Data Dashboard")
# Set page config
st.set_page_config(page_title="Twitter Data Dashboard", layout="wide")

# Helper functions
@st.cache_data
def load_data():
    data = pd.read_csv("df_combined.csv")
    data['date'] = pd.to_datetime(data['created_at'])
    return data



# Display the DataFrame
if st.checkbox("Show Raw Data"):
    st.write(df_combined)

# Sidebar filters
st.sidebar.header("Filter Data")
candidate_filter = st.sidebar.selectbox("Select Candidate", df_combined['candidate'].unique())
date_range = st.sidebar.date_input("Select Date Range", [])

# Filter data
filtered_data = df_combined[df_combined['candidate'] == candidate_filter]
if date_range:
    start_date, end_date = date_range
    filtered_data = filtered_data[(filtered_data['created_at'] >= start_date) & (filtered_data['created_at'] <= end_date)]

# Display filtered data
st.write(f"Filtered Data for {candidate_filter}")
st.write(filtered_data)

# Display some metrics
st.subheader("Key Metrics")
st.metric("Total Tweets", len(filtered_data))
st.metric("Average Likes", filtered_data['likes'].mean())
st.metric("Average Retweets", filtered_data['retweet_count'].mean())

# Visualizations
st.subheader("Visualizations")

# Likes vs. Retweets Scatter Plot
st.write("Likes vs. Retweets Scatter Plot")
fig, ax = plt.subplots()
sns.scatterplot(data=filtered_data, x='likes', y='retweet_count', ax=ax)
st.pyplot(fig)

# Sentiment Distribution
st.write("Sentiment Score Distribution (VADER)")
fig, ax = plt.subplots()
sns.histplot(filtered_data['sentiment_score_vader'], bins=20, kde=True, ax=ax)
st.pyplot(fig)

# Display word cloud (requires wordcloud library)
if st.checkbox("Show Word Cloud of Tweets"):
    from wordcloud import WordCloud
    all_words = ' '.join(filtered_data['cleaned_tweet'].dropna())
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(all_words)
    fig, ax = plt.subplots()
    ax.imshow(wordcloud, interpolation='bilinear')
    ax.axis('off')
    st.pyplot(fig)

