In [4]:
import warnings
warnings.filterwarnings('ignore')
import ipywidgets as widgets
import anywidget
import traitlets
import jupyter
import tweet_browser as tb
import voila
from matplotlib import pyplot as plt
from IPython.display import display, Javascript
import pandas as pd
import io
import math
import time
import json

TWEETS_PER_PAGE = 20
DEBUG_MODE = True
# JUPYTER_FILE_PATH = "../tree/images/"
JUPYTER_FILE_PATH = "images/"

out = widgets.Output()

def startSession(file):
    if file['type'] == 'xls':
        data = pd.read_excel(io.BytesIO(file.content))
    else:
        data = pd.read_csv(io.BytesIO(file.content))
    s = tb.Session(data, False)

def autoStartSession(fileName):
    data = tb.parse_data(fileName)
    # s = tb.Session(data, False)
    s = tb.Session(data, False, embeddings=tb.parse_data("allCensus_sample_embeddings.csv", header=None))
    browser = Browser(s, out)

def selectColumns (row, colHeaders: list):
    result = []
    for j in colHeaders:
        result.append(row[s.headerDict[j]])
    return result

class SearchBar(anywidget.AnyWidget):
    _esm = "anywidget/searchBar.js"
    _css = "anywidget/searchBar.css"
    value = traitlets.Unicode("[]").tag(sync=True)
    header = traitlets.Unicode("").tag(sync=True)
    header2 = traitlets.Unicode("").tag(sync=True)
    placeholder = traitlets.Unicode("").tag(sync=True)
    count = traitlets.Int(0).tag(sync=True)
    
class TweetDisplay(anywidget.AnyWidget):
    _esm = "anywidget/tweetDisplay.js"
    _css = "anywidget/tweetDisplay.css"
    value = traitlets.List([]).tag(sync=True)
    height = traitlets.Unicode("40vh").tag(sync=True)
    pageNum = traitlets.Int(1).tag(sync=True)
    maxPage = traitlets.Int(1).tag(sync=True)
    tweetsPerPage = traitlets.Int(TWEETS_PER_PAGE).tag(sync=True)
    filePath = traitlets.Unicode(JUPYTER_FILE_PATH).tag(sync=True)
    
class DatasetDisplay(anywidget.AnyWidget):
    _esm = "anywidget/datasetDisplay.js"
    _css = "anywidget/datasetDisplay.css"
    size = traitlets.Int().tag(sync=True)
    fileName = traitlets.Unicode().tag(sync=True)
    filePath = traitlets.Unicode(JUPYTER_FILE_PATH).tag(sync=True)
    
class PageSelect(anywidget.AnyWidget):
    _esm = "anywidget/pageSelect.js"
    _css = "anywidget/pageSelect.css"
    value = traitlets.CInt(1).tag(sync=True)
    maxPage = traitlets.CInt(1).tag(sync=True)
    changeSignal = traitlets.Int(0).tag(sync=True)
    filePath = traitlets.Unicode(JUPYTER_FILE_PATH).tag(sync=True)

class WeightBy(anywidget.AnyWidget):
    _esm = "anywidget/weightBy.js"
    _css = "anywidget/weightBy.css"
    value = traitlets.Unicode("None").tag(sync=True)

fileUp = widgets.widgets.FileUpload(
    accept='.csv, .txt, .xls, .tsv',
    multiple=False,
    description='Change Dataset'
)

# fileUp = FileInput(accept=".csv,.txt,.xls,.tsv", label="Change Dataset", style_="background-color: black;", prepend_icon="")

fileUp.add_class("change-input")
datasetDisplayCustom = DatasetDisplay()
fileUpBar = datasetDisplayCustom
# fileUpBar = widgets.Box([datasetDisplayCustom, fileUp])
# fileUpBar.add_class("dataset-display")

class SortBar(anywidget.AnyWidget):
    _esm = "anywidget/sortBar.js"
    _css = "anywidget/sortBar.css"
    sortScope = traitlets.Unicode("Displayed Examples").tag(sync=True)
    sortColumn = traitlets.Unicode("None").tag(sync=True)
    sortOrder = traitlets.Unicode("DESC").tag(sync=True)
    filePath = traitlets.Unicode(JUPYTER_FILE_PATH).tag(sync=True)

class SampleSelector(anywidget.AnyWidget):
    _esm = "anywidget/sampleSelector.js"
    _css = "anywidget/sampleSelector.css"
    filePath = traitlets.Unicode(JUPYTER_FILE_PATH).tag(sync=True)
    value = traitlets.Int(50).tag(sync=True)
    total = traitlets.Int(0).tag(sync=True)
    changeSignal = traitlets.Int(0).tag(sync=True)

class ToggleSwitch(anywidget.AnyWidget):
    _esm = "anywidget/toggleSwitch.js"
    _css = "anywidget/toggleSwitch.css"
    value = traitlets.Int(2).tag(sync=True)
    label = traitlets.Unicode("").tag(sync=True)
    calendarStart = traitlets.Unicode("").tag(sync=True)
    calendarEnd = traitlets.Unicode("").tag(sync=True)

class ParameterDisplay(anywidget.AnyWidget):
    _esm = "anywidget/parameterDisplay.js"
    _css = "anywidget/parameterDisplay.css"
    headers = traitlets.List().tag(sync=True)
    value = traitlets.List().tag(sync=True)
    notFound = traitlets.Unicode().tag(sync=True)
    firstWord = traitlets.Unicode().tag(sync=True)
    secondWord = traitlets.Unicode().tag(sync=True)
    filePath = traitlets.Unicode(JUPYTER_FILE_PATH).tag(sync=True)

class AiSummary(anywidget.AnyWidget):
    _esm = "anywidget/aiSummary.js"
    _css = "anywidget/aiSummary.css"
    value = traitlets.List().tag(sync=True)
    sentenceNums = traitlets.List().tag(sync=True)
    selected = traitlets.CInt(0).tag(sync=True)
    changeSignal = traitlets.Int(0).tag(sync=True)

class LoadingPage(anywidget.AnyWidget):
    _esm = "anywidget/loadingScreen.js"
    _css = "anywidget/loadingScreen.css"
    filePath = traitlets.Unicode(JUPYTER_FILE_PATH).tag(sync=True)
    text = traitlets.Unicode().tag(sync=True)

class DummyElement(anywidget.AnyWidget):
    _esm = "anywidget/dummyscript.js"
    _css = "anywidget/misc.css"

class Browser:
    def __init__(self, s, out):
        self.s = s
        self.screen = "main"
        self.colHeaders = list(s.headerDict.keys())
        self.createWidgets()
        self.search(None)
        self.resetDisplay()
        # self.history = [StoredSearch()]
        
    def resetDisplay(self, b = None):
        out.clear_output(True)
        with out:
            if self.screen == "main":
                display(self.mainPage)
                self.getTweets()
                if DEBUG_MODE:
                    temp = self.s.currentSet
                    if self.s.currentSet != self.s.base:
                        self.s.back()
                    self.s.currentSet = temp
                    assert(self.s.currentSet.size >= len(self.tweetDisplay.value))
            elif self.screen == "advanced":
                display(self.closeButton)
                display(self.advancedPage)
            elif self.screen == "summary":
                display(self.closeButton)
                display(self.aiSummary)
            elif self.screen == "contributingTweets":
                self.getSummaryTweets(None)
                display(self.closeButton)
                display(self.contributingTweets)
            # display(self.debugText)
            display(DummyElement())

            
    def search(self, b):
        self.s.currentSet = self.s.base
        for i in range(self.geography.count):
            self.s.filterBy("State", json.loads(self.geography.value)[i].lower().capitalize())
        for i in range(self.userName.count):
            self.s.filterBy("SenderScreenName", json.loads(self.userName.value)[i])
        if(self.fromDate.value != None and self.toDate.value != None):
            self.s.filterDate(self.fromDate.value.strftime('%Y-%m-%d'), self.toDate.value.strftime('%Y-%m-%d'))
        if(self.allowRetweets.value == 0):
            self.s.removeRetweets()
        if(self.exclude.count > 0):
            self.s.exclude(json.loads(self.exclude.value))
        if(self.mustInclude.count > 0):
            self.s.searchKeyword(json.loads(self.mustInclude.value), False)
        if(self.containOneOf.count > 0):
            self.s.searchKeyword(json.loads(self.containOneOf.value), True)
        self.sampleSelector.total = self.s.currentSet.size
        self.tweetDisplay.pageNum = 1
        self.currentWorkingSet = self.s.currentSet
        # self.getTypicalPosts() # TODO: only call this when the user clicks on the tab
        self.loadTab(None, self.tabs.selected_index)
        self.updateSearchParams(b)

    def tryGetNewSample(self):
        self.tweetDisplay.pageNum = 1
        sampleSize = self.sampleSelector.value
        if self.s.currentSet.size < self.sampleSelector.value:
            self.sampleSelector.value = -1
        if self.sampleSelector.value == -1:
            sampleSize = self.s.currentSet.size
        
        if DEBUG_MODE:
            assert(sampleSize >= 0)
        if self.weightBy.value == "None":
            self.s.simpleRandomSample(sampleSize)  
        else:
            self.s.weightedSample(sampleSize, self.weightBy.value)
        self.getTweets()        
    
    def generateNewSample(self, b):
        # make sure to call only after a sample has already been generated
        # self.s.back()
        self.s.currentSet = self.currentWorkingSet
        self.tryGetNewSample()
    
    def getSummaryTweets(self, b):
        pageNum = self.pageSelectAi.value - 1 # convert to 0 indexing
        self.summaryLine.value = self.aiSummary.value[pageNum]
        ans = self.s.getCurrentSubset()
        tweets = self.aiSummary.sentenceNums[pageNum]
        ans = ans.reset_index(drop=True).iloc[tweets]
        tempArr = []
        for i in range(len(ans)):
            tempArr.append(ans.iloc[i].to_json())
        self.summaryDisplay.value = tempArr
        self.pageSelectAi.changeSignal += 1
    
    def getTweets(self, change=None):
        dataSet = self.s.getCurrentSubset()
        pageNum = self.tweetDisplay.pageNum
        
        tempArr = []
        sorted = self.getSortedTweets(pageNum)
        if DEBUG_MODE:
            assert(len(sorted) <= 2 * TWEETS_PER_PAGE)
        self.tweetDisplay.maxPage = math.ceil(self.s.currentSet.size / TWEETS_PER_PAGE)
        for i in range(len(sorted)):   
            tempArr.append(sorted.iloc[i].to_json())
        self.sampleTitle.value = "Displaying " + str(self.s.currentSet.size) + " posts from " + str(self.sampleSelector.total) + " results"
        self.tweetDisplay.value = tempArr
    
    def getSortedTweets(self, pageNum):
        ans = self.s.getCurrentSubset()
        
        asc = True
        na_pos = "first"
        column = self.sortBar.sortColumn
        keyFunc = None
            
        if (column == "Username" or column == "SenderScreenName"):
            keyFunc = userNameToLower
        if (self.sortBar.sortOrder == "DESC"):
            asc = False
            na_pos = "last"

        if self.sortBar.sortColumn != "None":
            ans = ans.sort_values(by=[column], ascending=asc, na_position = na_pos, key=keyFunc)
        ans = ans.iloc[max((pageNum-2) * TWEETS_PER_PAGE, 0) : min(pageNum * TWEETS_PER_PAGE, len(ans))]
        return ans
        
    def createWidgets(self):
        self.advancedButton = widgets.Button(description='Click here to enter search query')
        self.advancedButton.add_class("long-button").add_class("modify-search")
        self.advancedButton.on_click(self.openSearchMenu)
        self.searchedCriteria = widgets.HTML("<p>SEARCHED CRITERIA</p>").add_class("searched-criteria")
        self.tweetDisplay = TweetDisplay(height="60vh")
        self.tweetDisplay.observe(self.getTweets, names=["pageNum"])
        self.datasetDisplay = DatasetDisplay()
        self.generateSummary = widgets.Button(description="Generate AI Summary")
        self.generateSummary.add_class("generic-button")
        self.generateSummary.on_click(self.generateAiSummary)
        self.sortBar = SortBar()
        self.sortBar.observe(self.getTweets, names=["sortScope", "sortColumn", "sortOrder"])
        # optionsBar = widgets.Box(children = [self.sortBar])
        # optionsBar.layout = widgets.Layout(align_items = "center", justify_content = "space-between", width = "100%")
        # self.searchedKeywords = ParameterDisplay(firstWord = "Searched", secondWord = "Keywords", headers = ["Must Include", "Contain one of", "Exclude"], notFound = 'To enter keywords, click "Search & Filter"')
        # self.appliedFilters = ParameterDisplay(firstWord = "Applied", secondWord = "Filters", headers = ["calendar.svg", "geography.svg", "username.svg", "repost.svg", "weight.svg"], notFound = 'To enter filters, click "Search & Filter"')
        self.advancedBar = widgets.VBox([self.searchedCriteria, self.advancedButton]).add_class("advanced-bar")
        self.sampleTitle = widgets.HTML().add_class("display-count")
        self.sampleSelector = SampleSelector()
        self.sampleSelector.observe(self.generateNewSample, names=["changeSignal"])
        
        self.filterBy = widgets.HTML(value = "<b>Filter By<b/>")
        self.dateRange = widgets.HTML(value = "<b style='font-size: 1.17em;'>Date Range <b/>")
        self.fromDate = widgets.DatePicker(description = "From")
        self.toDate = widgets.DatePicker(description = "To")
        minDate = self.s.findMinDate().strftime("%Y-%m-%d")
        maxDate = self.s.findMaxDate().strftime("%Y-%m-%d")
        self.fromDate.add_class("date-constraint") # The script to set the elements attribute is attached to the toggleSwitch widget
        self.toDate.add_class("date-constraint") # This was done for convenience and should be changed later
        self.weightBy = WeightBy()
        
        self.dateBox = widgets.VBox([self.dateRange, self.fromDate, self.toDate])
        self.allowRetweets = ToggleSwitch(label = "Include reposts", calendarStart = minDate, calendarEnd = maxDate) # TODO: move calendar script somwhere else
        self.geography = SearchBar(header = "Geography", placeholder = "Search")
        self.userName = SearchBar(header = "Username", placeholder = "Search")
        self.filterBox = widgets.VBox([self.filterBy, self.dateBox, self.allowRetweets, self.geography, self.userName, self.weightBy])
        self.filterBox.add_class("filter-box")
        self.typicalSampleTitle = widgets.HTML()
        self.typicalSampleTitle.add_class("display-count")
        self.randomSelection = widgets.VBox([widgets.HBox([self.sampleTitle, self.sampleSelector], layout=widgets.Layout(justify_content="space-between")), self.sortBar, self.tweetDisplay])
        self.centralTweets = TweetDisplay(height="60vh")
        centralTweetBox = widgets.VBox([self.typicalSampleTitle, self.centralTweets])
        self.loadingPage = LoadingPage(text="Generating AI Summary")
        self.tabs = widgets.Tab(children=[self.randomSelection, centralTweetBox, self.loadingPage], titles=["Random Posts", "Typical Posts", "AI Summary"])
        self.tabs.observe(self.loadTab, names=["selected_index"])
        self.topBar = widgets.HBox([widgets.HTML("Tweet Browser").add_class("title"), self.datasetDisplay])
        self.topBar.layout = widgets.Layout(justify_content = "space-between", width = "100%", border_bottom = "5px solid black", padding = "3px")
        self.mainPage = widgets.VBox([self.topBar, self.advancedBar, widgets.HBox([self.filterBox, self.tabs])])
        # self.mainPage = widgets.VBox([self.paramDisplay, self.tabs])
        self.debugText = widgets.HTML("test")

        self.makeAdvancedPage()
        self.makeAiSummaryPage()
        self.makeSummaryContributionPage()

    def makeAdvancedPage(self):
        self.searchButton = widgets.Button(description='Search', icon="search")
        self.hiddenButton = widgets.Button()
        self.hiddenButton.add_class("hidden-button") # work around for syncing search when the user still has input in the search bars
        self.hiddenButton.on_click(self.search)
        self.searchButton.add_class("generic-button").add_class("search-button")
        self.clearButton = widgets.Button(description='Clear All')
        # self.clearButton.on_click(self.clearSettings)
        self.clearButton.add_class("clear-button")
        self.bottomBar = widgets.HBox([self.clearButton, self.searchButton, self.hiddenButton], layout = widgets.Layout(justify_content = "flex-end"))
        self.keyWordSearch = widgets.HTML(value = "<b>Keyword Search<b/>")
        self.keyWordSearch.add_class("keyword-search")
        self.mustInclude = SearchBar(header = "Must include all", header2="(AND)", placeholder='e.g. “civil null” means each post in the result must contain the word “civil” and “null”')
        self.containOneOf = SearchBar(header = "Must include one Of", header2="(OR)", placeholder='e.g. “census penny” means each post in the result must contain either “census” or “penny” or both')
        self.exclude = SearchBar(header = "Must not include", header2="(NOT)", placeholder='e.g. “toxic ban” means none of the posts in the result contains the word “toxic” and “ban”')
        self.searches = widgets.VBox([self.keyWordSearch, self.mustInclude, self.containOneOf, self.exclude])
        self.searches.add_class("search-box")
        self.closeButton = widgets.Button(description = 'X')
        self.closeButton.add_class("close-button")
        self.closeButton.on_click(self.closeSearchMenu)
        self.advancedBox = widgets.HBox([self.searches, self.filterBox])
        self.advancedBox.add_class("advanced-box")
        # self.advancedPage = widgets.VBox([self.advancedBox, self.bottomBar])
        self.advancedPage = widgets.VBox([self.searches, self.bottomBar])
    
    def makeAiSummaryPage(self):
        self.aiSummary = AiSummary(sentenceNums=[[1, 3, 4, 8, 11, 13, 21, 24, 27, 31, 35, 36, 37, 38, 39], [6, 12, 16, 22], [3, 7, 28], [14, 25, 27, 43]], value=["The tweets largely focus on the importance of participating in the U.S. Census, emphasizing that it is crucial to ensure everyone is counted in order to determine resources and representation.", "Many tweets emphasize that the census should include everyone, regardless of their race, ethnicity, citizenship status, or any other characteristic.", "Some tweets highlight the role of the census in tracking population growth and demographic changes.", "There are also discussions about the inclusion or exclusion of non-citizens or undocumented immigrants from the census"])
        self.aiSummary.observe(self.showContributingTweets, names=["changeSignal"])
    
    def makeSummaryContributionPage(self):
        self.backArrow = widgets.Button(icon="solid fa-arrow-left").add_class("back-button")
        self.backArrow.on_click(self.generateAiSummary)
        self.summaryLine = widgets.HTML(value="placeholder").add_class("selected-sentence")
        self.pageSelectAi = PageSelect()
        self.pageSelectAi.observe(self.getSummaryTweets, names=["value"])
        self.summaryDisplay = TweetDisplay(height="70vh")
        leftBar = widgets.VBox([self.backArrow, self.summaryLine, self.pageSelectAi]).add_class("left-bar")
        summaryBar = widgets.HBox([leftBar, self.summaryDisplay])
        self.contributingTweets = widgets.VBox([widgets.HTML(value="<h1>Contributing Tweets</h1>"), widgets.HTML(value="Here are the tweets that contribute to this part of the summary"), summaryBar])
    
    def openSearchMenu(self, change):
        self.screen = "advanced"
        self.resetDisplay()
        
    def generateAiSummary(self, change):
        self.screen = "summary"
        self.resetDisplay()

    def clearSettings(self, change):
        self.geography.value = []
        self.geography.count = 0
        self.userName.value = []
        self.userName.count = 0
        self.fromDate.value = None
        self.toDate.value = None
        self.allowRetweets.value = False
        self.exclude.value = []
        self.exclude.count = 0
        self.mustInclude.value = []
        self.mustInclude.count = 0
        self.containOneOf.value = []
        self.containOneOf.count = 0
        self.weightBy.value = None
        self.resetDisplay()

    def getTypicalPosts(self, change=None):
        tempArr = []
        self.s.currentSet = self.currentWorkingSet
        # result = self.s.getCentral(self.s.currentSet.parent.parents[0])
        # self.s.back()
        result = self.s.getCentral()
        count = min(5, len(result))
        if count == 0:
            self.typicalSampleTitle.visible = False
            self.typicalSampleTitle.value = ""
        else:
            self.typicalSampleTitle.visible = True
            self.typicalSampleTitle.value = "Displaying " + str(count) + " typical posts from " + str(len(result)) + " results"
        for i in range(count):
            tempArr.append(result.iloc[i].to_json())
        self.centralTweets.value = tempArr
        
    # def restoreSearch(self, settings):

    def loadTab(self, change, tabNum = None):
        if change != None:
            tabNum = change["new"]
        if tabNum == 1:
            self.getTypicalPosts()
        
    
    def showContributingTweets(self, change):
        self.screen = "contributingTweets"
        self.pageSelectAi.totalTweets = self.pageSelectAi.tweetsPerPage * (len(self.aiSummary.sentenceNums)-1) + 1
        self.pageSelectAi.value = self.aiSummary.selected + 1 # convert from 0 indexing
        self.resetDisplay()
    
    def updateSearchParams(self, change):
        val1 = val2 = val3 = ""
        if(self.mustInclude.count > 0):
            val1 = ', '.join(json.loads(self.mustInclude.value))
        if(self.containOneOf.count > 0):
            val2 = ', '.join(json.loads(self.containOneOf.value))
        if(self.exclude.count > 0):
            val3 = ', '.join(json.loads(self.exclude.value))
        # self.searchedKeywords.value = [val1, val2, val3]
        selectedDates = ''
        if(self.fromDate.value != None and self.toDate.value != None):
            selectedDates = str(self.fromDate.value) + " to " + str(self.toDate.value)
        geo = usrname = ""
        if (self.geography.count > 0):
            geo = ', '.join(json.loads(self.geography.value))
        if (self.userName.count > 0):
            usrname = ', '.join(json.loads(self.userName.value))
        retweets = "" if self.allowRetweets.value == 2 else ("yes" if self.allowRetweets.value > 0 else "no")
        weightBy = ""
        if self.weightBy.value != "None":
            weightBy = self.weightBy.value
            if self.weightBy.value == "SenderInfluencerScore":
                weightBy = "Influencer Score"
        # self.appliedFilters.value = [selectedDates, geo, usrname, retweets, weightBy]
        self.closeSearchMenu(change)

    def closeSearchMenu(self, change):
        self.screen = "main"
        self.resetDisplay()
    
def fileHandler(change):
    startSession(fileUp.value[0])
    
def userNameToLower(input):
    return input.str.lower()

# with out:
#     display(fileUpBar)

autoStartSession("allCensus_sample.csv")

fileUp.observe(fileHandler, names=["value"])

out


Output()