#  Real-Time Social Media Dashboard
(Colab + Streamlit + ngrok)



2 — Install dependencies

In [1]:
# Install required packages
!pip install --quiet streamlit pyngrok plotly pandas


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m67.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m94.2 MB/s[0m eta [36m0:00:00[0m
[?25h

upload Files

In [2]:
# Upload aggregates.csv and/or tweets.csv to the Colab session (use this if not mounting Drive)
from google.colab import files
uploaded = files.upload()
print("Uploaded files:", list(uploaded.keys()))


Saving tweets.csv to tweets.csv
Saving aggregates.csv to aggregates.csv
Uploaded files: ['tweets.csv', 'aggregates.csv']


5 — Create the Streamlit app file

In [3]:
# Create collaborator_streamlit_app.py in the working directory
app_code = r'''
"""
collaborator_streamlit_app.py
Team variant of the real-time social media dashboard.
- Robust loading of aggregates.csv
- Smoothing toggle, range controls, KPI metrics
- Optional text analysis if tweets.csv present
"""

import os
import re
from collections import Counter

import streamlit as st
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

st.set_page_config(page_title="Social Media Monitor — Team", layout="wide")
st.title("Social Media Monitor — Team Edition")

# Sidebar controls
st.sidebar.header("Data & Display")
use_drive = st.sidebar.checkbox("Load from Drive", value=False)
if use_drive:
    csv_path = st.sidebar.text_input("Drive path to aggregates.csv", "/content/drive/MyDrive/aggregates.csv")
else:
    csv_path = st.sidebar.text_input("Workspace filename", "aggregates.csv")

# load function
def load_aggregates(path):
    if not os.path.exists(path):
        return None, f"File not found: {path}"
    try:
        df = pd.read_csv(path)
    except Exception as e:
        return None, f"Error reading CSV: {e}"
    # normalize expected columns
    lc = {c.lower(): c for c in df.columns}
    minute_col = lc.get('minute', lc.get('time', lc.get('timestamp', None)))
    count_col = lc.get('tweet_count', lc.get('count', None))
    sent_col = lc.get('avg_sentiment', lc.get('sentiment', None))
    if not minute_col or not count_col or not sent_col:
        return None, f"Missing required columns. Available: {list(df.columns)}"
    df = df.rename(columns={minute_col: 'idx_time', count_col: 'tweets', sent_col: 'sentiment'})
    # attempt datetime parse
    try:
        df['idx_time_dt'] = pd.to_datetime(df['idx_time'], errors='coerce')
        xcol = 'idx_time_dt' if df['idx_time_dt'].notna().any() else 'idx_time'
    except Exception:
        xcol = 'idx_time'
    return (df, xcol), None

(res, err) = load_aggregates(csv_path)
if err:
    st.warning(err)
    st.info("Tip: upload aggregates.csv into the workspace or mount Drive and set the path.")
else:
    df, xcol = res
    st.sidebar.success(f"Loaded {os.path.basename(csv_path)} ({df.shape[0]} rows)")
    df['tweets'] = pd.to_numeric(df['tweets'], errors='coerce')
    df['sentiment'] = pd.to_numeric(df['sentiment'], errors='coerce')

    # display options
    apply_smooth = st.sidebar.checkbox("Enable smoothing", value=True)
    window = st.sidebar.slider("Smoothing window", 1, 21, 5)

    if apply_smooth:
        df['tweets_smooth'] = df['tweets'].rolling(window=window, min_periods=1).mean()
        df['sentiment_smooth'] = df['sentiment'].rolling(window=window, min_periods=1).mean()
    else:
        df['tweets_smooth'] = df['tweets']
        df['sentiment_smooth'] = df['sentiment']

    # range selector
    st.sidebar.markdown("### Display range")
    if xcol == 'idx_time_dt':
        start_date = df['idx_time_dt'].min().date()
        end_date = df['idx_time_dt'].max().date()
        sel = st.sidebar.date_input("Select date range", [start_date, end_date])
        mask = (df['idx_time_dt'].dt.date >= sel[0]) & (df['idx_time_dt'].dt.date <= sel[1])
        df_view = df.loc[mask].copy()
    else:
        start = st.sidebar.number_input("Start row", 0, max(0, len(df)-1), 0)
        end = st.sidebar.number_input("End row", 0, max(0, len(df)-1), len(df)-1)
        df_view = df.iloc[int(start):int(end)+1].copy()

    # KPIs
    c1, c2, c3 = st.columns(3)
    c1.metric("Rows shown", int(df_view.shape[0]))
    c2.metric("Total tweets", int(df_view['tweets'].sum()) if df_view['tweets'].notna().any() else 0)
    c3.metric("Mean sentiment", round(df_view['sentiment'].mean(), 3) if df_view['sentiment'].notna().any() else "N/A")

    # Plots
    st.subheader("Tweet Volume")
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df_view[xcol], y=df_view['tweets'], mode='lines+markers', name='tweets'))
    fig.add_trace(go.Scatter(x=df_view[xcol], y=df_view['tweets_smooth'], mode='lines', name='smoothed', line=dict(dash='dash')))
    fig.update_layout(height=420, margin=dict(l=10,r=10,t=30,b=10))
    st.plotly_chart(fig, use_container_width=True)

    st.subheader("Sentiment Over Time")
    fig2 = go.Figure()
    fig2.add_trace(go.Scatter(x=df_view[xcol], y=df_view['sentiment'], mode='lines+markers', name='sentiment'))
    fig2.add_trace(go.Scatter(x=df_view[xcol], y=df_view['sentiment_smooth'], mode='lines', name='smoothed', line=dict(dash='dot')))
    fig2.update_layout(height=380, margin=dict(l=10,r=10,t=30,b=10))
    st.plotly_chart(fig2, use_container_width=True)

    if st.sidebar.checkbox("Show data preview"):
        st.dataframe(df_view.head(200))

# Optional quick text analysis
if os.path.exists('tweets.csv'):
    st.markdown("---")
    st.header("Quick Text Summary (tweets.csv)")
    try:
        tdf = pd.read_csv('tweets.csv', low_memory=False)
        text_cols = [c for c in tdf.columns if 'text' in c.lower() or 'tweet' in c.lower()]
        if text_cols:
            txt = ' '.join(tdf[text_cols[0]].dropna().astype(str).tolist()).lower()
            mentions = re.findall(r'@\w+', txt)
            hashtags = re.findall(r'#\w+', txt)
            top_mentions = Counter(mentions).most_common(12)
            top_hashtags = Counter(hashtags).most_common(12)
            st.markdown("**Top mentions**")
            st.table(top_mentions)
            st.markdown("**Top hashtags**")
            st.table(top_hashtags)
        else:
            st.info(f"No text-like column found. Columns: {list(tdf.columns)}")
    except Exception as e:
        st.error(f"Could not analyze tweets.csv: {e}")
else:
    st.info("No tweets.csv present for text analysis.")
'''
with open('collaborator_streamlit_app.py', 'w', encoding='utf-8') as f:
    f.write(app_code)
print("Created collaborator_streamlit_app.py")


Created collaborator_streamlit_app.py


6 — Set ngrok authtoken

In [4]:

from pyngrok import conf
NGROK_AUTH_TOKEN = "344LJHozddiK9TpbD9g42MNlRlZ_2u7vS4iymaF2G5ivMtkWv"

if NGROK_AUTH_TOKEN == "344LJHozddiK9TpbD9g42MNlRlZ_2u7vS4iymaF2G5ivMtkWvE":
    raise ValueError("Set NGROK_AUTH_TOKEN in this cell before running Cell 7.")
conf.get_default().auth_token = NGROK_AUTH_TOKEN
print("ngrok auth token configured.")


ngrok auth token configured.


7 — Start Streamlit & ngrok

In [5]:
# Starts Streamlit in background and exposes via ngrok
import subprocess, time
from pyngrok import ngrok

# Clean previous processes
!pkill -f streamlit || true
!pkill -f ngrok || true

# Start streamlit (serves collaborator_streamlit_app.py)
cmd = "streamlit run collaborator_streamlit_app.py --server.port 8501 --server.headless true"
proc = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)

time.sleep(2)
public_url = ngrok.connect(8501).public_url
print("ngrok tunnel ->", public_url)
print("If the app doesn't respond immediately, wait 5-10s and refresh the URL.")


^C
^C
ngrok tunnel -> https://epidemiologically-unemaciated-twanna.ngrok-free.dev
If the app doesn't respond immediately, wait 5-10s and refresh the URL.
