In [1]:
import jupyter
from tweet_browser_test import tweet_browser as tb
import ipywidgets as widgets
import voila
from matplotlib import pyplot as plt
from IPython.display import display
import pandas as pd
import io

out = widgets.Output()

def startSession(file):
    if file['type'] == 'xls':
        df = pd.read_excel(io.BytesIO(file.content))
    else:
        df = pd.read_csv(io.BytesIO(file.content))
    db = tb.DataBaseSim(df)
    s = tb.Session(db)
    browser = Browser(s, out)
    

def selectColumns (row, colHeaders: list):
    result = []
    for j in colHeaders:
        result.append(row[s.headerDict[j]])
    return result
    
class Browser:
    def __init__(self, s, out):
        self.s = s
        self.out = out
        self.operators = [None] * 5
        self.colHeaders = list(s.headerDict.keys())
        self.createWidgets()
        self.resetDisplay()
        #print(self.s.weightable)
    
    def forward(self, b):
        try: 
            self.s.next()
            self.getTweets(b)
        except IndexError:
            pass
        
    def back(self, b):
        try: 
            self.s.back()
            self.getTweets(b)
        except IndexError:
            pass
        
    def resetDisplay(self, b = None):
        self.currentSetCount.value = str(self.s.currentSet.size)
        self.sampleSelector.max = self.s.currentSet.size
        self.out.clear_output(True)
        with out:
            display(self.menu)
            display(self.stack) 
            
    def getTweets(self, b):
        self.resetDisplay()
        htmlText = "<style>table, th, td {border: 1px solid black;} th, td {padding: 10px;}</style>"
        htmlText += "<table> <tr> "
        for header in self.columns.value:
            htmlText += " <th> " + header + " </th> "
        dataSet = self.s.getCurrentSubset()
        for i in range(len(dataSet)):
            htmlText += " <tr> "
            #cols = selectColumns(dataSet.iloc[i], colHeaders)
            #for j in colHeaders:
            for j in self.columns.value:
                #print(type(dataSet.iloc[i].loc[j]))
                htmlText += " <td>" + str(dataSet.iloc[i].loc[j]) + "</td> "
            htmlText += " </tr> "
        table = widgets.HTML(value=htmlText + " </table>")
        with out:
            print("Tweets displayed:", self.s.currentSet.size)
            display(table)
           
    def randomSample(self, b):
        print(self.s.currentSet.size)
        self.s.simpleRandomSample(self.sampleSelector.value)
        self.getTweets(b)
        
    def weightedSample(self, b):
        self.s.weightedSample(self.sampleSelector.value, self.weightedSelector.value)
        self.getTweets(b)

    def searchKeyword(self, b):
        if self.searchType.value == 'Contains Any':
            self.s.searchKeyword(self.searchBox.value.split(' '), True)
        elif self.searchType.value == 'Contains All':
            self.s.searchKeyword(self.searchBox.value.split(' '))
        elif self.searchType.value == 'Advanced':
            self.s.advancedSearch(self.searchBox.value)
        elif self.searchType.value == 'Regular Expression':
            self.s.regexSearch(self.searchBox.value)
        else:
            return
        self.getTweets(b)

    def filterBy(self, b):
        index = self.filterBox.value.find(" ")
        if index == -1:
            print("Invalid input")
            return
        columnName = self.filterBox.value[:index]
        if not (columnName in self.s.headerDict):
            print("Invalid column")
            return
        value = self.filterBox.value[index + 1:]
        self.s.filterBy(columnName, value)
        self.getTweets(b)

    def dimensionHandler(self, change):
        reset = True
        if change['new'] > 2:
            self.clusteringWhen = widgets.ToggleButtons(options=[("Before stage 1", "before_stage1"), 
                                    ("Between 1 and 2", "btwn"),("After stage 2", "after_stage2")], 
                                    value= "before_stage1", description= "When to cluster: ",
                                    tooltip= "When clustering will be performed relative to dimension reduction")
            self.clusterBar = widgets.VBox([min_df, dimRed1, dimensions, dimRed2, clusteringWhen, 
                                clusteringMethod, clusterStack, clusterButton])
            if change['old'] > 2:
                reset = False
        else:
            self.clusteringWhen = widgets.ToggleButtons(options=[("Before stage 1", "before_stage1"), ("Between 1 and 2", "btwn")], 
                                    value= "before_stage1", description= "When to cluster: ",
                                    tooltip= "When clustering will be performed relative to dimension reduction")
            self.clusterBar = widgets.VBox([min_df, dimRed1, dimensions, clusteringWhen, 
                                       clusteringMethod, clusterStack, clusterButton])
            if change['old'] <= 2:
                reset = False
        if reset:
            self.operators[4] = self.clusterBar
            self.stack = widgets.Stack(self.operators, selected_index= 4)
            self.resetDisplay()

    def cluster(self, b):
        clusteringInput = self.clusterStack.children[self.clusterStack.selected_index].value
        result = self.s.dimRed_and_clustering(dimRed1_method=self.dimRed1.value, 
                        dimRed1_dims=self.dimensions.value, dimRed2_method=self.dimRed2.value, 
                        clustering_when=self.clusteringWhen.value, clustering_method=self.clusteringMethod.value, 
                        num_clusters=self.clusteringInput.value, min_obs=self.clusteringInput.value, 
                                              num_neighbors=self.clusteringInput.value)
        with self.out:
            result.show()
        return result

    def createWidgets(self):
        ####### UI Widgets #######
        # Menu Buttons
        self.selector = widgets.Dropdown(
            options=['Random Sample', 'Weighted Sample', 'Search Keyword', 'Filter By', 'Clustering'],
            value='Random Sample', description='Operation:')
        self.clearButton = widgets.Button(description='Clear', tooltip='Clear current output display')
        self.refreshButton = widgets.Button(description= 'Refresh', tooltip= 'Display the current subset again')
        self.backButton = widgets.Button(description= '<-', tooltip= 'Go back to the previous working set, if possible')
        self.forwardButton = widgets.Button(description= '->', tooltip= 'Go forward to the next working set, if possible')
        # self.currentSet = widgets.HTML(value = "Current Set Size: ")
        self.currentSetCount = widgets.HTML(value = "0")
        self.currentSet = widgets.HBox([widgets.HTML(value = "Current Set Size: "), self.currentSetCount])
        self.columns = widgets.SelectMultiple(options=[col for col in self.s.headerDict.keys()], layout={'width': 'max-content'},
                                         value=["SenderScreenName", "CreatedTime", "Message"], description='Columns')
        self.columnsAccordian = widgets.Accordion(children = [self.columns], titles= ["Select Columns"])
        self.menuButtons = widgets.HBox([self.selector, self.clearButton, self.refreshButton, self.backButton, self.forwardButton])
        self.menu = widgets.VBox([self.currentSet, self.columnsAccordian, self.menuButtons])
        self.clearButton.on_click(self.resetDisplay)
        self.refreshButton.on_click(self.getTweets)
        self.backButton.on_click(self.back)
        self.forwardButton.on_click(self.forward)


        # Random Sample
        self.randomSampleButton = widgets.Button(description='Get Tweets', tooltip='Get Tweets')   
        self.sampleSelector = widgets.IntSlider(value=300, min=0, max = self.s.currentSet.size, step=1, 
                        description='Sample size', continuous_update=False, orientation='horizontal', readout=True, 
                        readout_format='d', tooltip= 'The number of tweets that will be selected in the sample')
        self.randomSampleBar = widgets.HBox([self.sampleSelector, self.randomSampleButton])
        self.randomSampleButton.on_click(self.randomSample)
        self.operators[0] = self.randomSampleBar

        # Weighted Sample
        self.weightedButton = widgets.Button(description='Get Tweets', tooltip='Get Tweets')  
        self.weightedSelector = widgets.Text(value='', placeholder='<Column_name>', description='Weight by')
        self.weightedBar = widgets.HBox([self.sampleSelector, self.weightedSelector, self.weightedButton])
        self.weightedButton.on_click(self.weightedSample)
        self.operators[1] = self.weightedBar

        #Search Bar
        self.searchBox = widgets.Text(value='', placeholder='Keyword', description='Input:')
        self.searchButton = widgets.Button(description='Search', tooltip='Run search on current input')
        ContainsAnyToolTip = 'Returns tweets with any typed keyword (seperated by spaces).'
        ContainsAllToolTip = 'Returns tweets containing all typed space-separated keywords.'
        AdvancedSearchToolTip = 'Search for tweets that match the inputted logical expression. Words must be in single quotes.'
        RegularExpressionToolTip = 'Returns tweets that match regular expression.'
        self.searchType = widgets.ToggleButtons(options=['Contains Any', 'Contains All', 'Advanced', 'Regular Expression'],
            tooltips=[ContainsAnyToolTip, ContainsAllToolTip, AdvancedSearchToolTip, RegularExpressionToolTip])
        self.searchBar = widgets.VBox([widgets.HBox([self.searchBox, self.searchButton]), self.searchType])
        self.searchButton.on_click(self.searchKeyword)
        self.operators[2] = self.searchBar

        #Filer By
        self.filterBox = widgets.Text(value='', placeholder='<Column_name> <value>', description='Filter:')
        self.filterButton = widgets.Button(description='Filter', tooltip='Filter by column on current input')
        self.filterBar = widgets.HBox([self.filterBox, self.filterButton])
        self.filterButton.on_click(self.filterBy)
        self.operators[3] = self.filterBar

        #Clustering
        self.min_df = widgets.IntText(value= 1, description= "Word Frequency", style={'description_width': 'initial'},
                                  tooltip= "Specifies the minum frequency required for a word to be considered during processing")
        self.dimRed1 = widgets.ToggleButtons(options=[('PCA', "pca"), ("UMAP", "umap")], value="pca", 
                                        description="First dimension reduction method: ",
                                  tooltip= "The method that will reduce the dimensions represented by words with the min frequency")
        self.dimensions = widgets.BoundedIntText(value= 2, min= 2, max= len(self.s.headerDict) - 2, 
            description= "Dimensions: ", tooltip= "The number of dimensions the first dimension reduction method should reduce to")
        self.dimRed2 = widgets.ToggleButtons(options=[('PCA', "pca"), ("UMAP", "umap")], value="pca", 
                                        description="Second dimension reduction method: ",
                                       tooltip= "The second method used to reduce dimensionality to 2")
        self.clusteringWhen = widgets.ToggleButtons(options=[("Before stage 1", "before_stage1"), ("Between 1 and 2", "btwn")], 
                                        value= "before_stage1", description= "When to cluster: ",
                                            tooltip= "When clustering will be performed relative to dimension reduction")
        self.clusteringMethod = widgets.ToggleButtons(options=[("GMM", "gmm"), ("K-Means", "k-means"), ("HDBScan", "hdbscan"), 
                                                          ("Leiden", "leiden")], value="gmm", description="Clustering method ",
                                                tooltip= "The clustering algorithm that will be used")
        self.clusteringInput = widgets.BoundedIntText(description= "Number of Clusters: ", value= 2, min= 2,
                                                 style={'description_width': 'initial'}, tooltip= "The number of clusters")
        self.clusteringInput1 = widgets.BoundedIntText(description= "Number of Clusters: ", value= 2, min= 2,
                                                  style={'description_width': 'initial'}, tooltip= "The number of clusters")
        self.clusteringInput2 = widgets.BoundedIntText(description= "Minimum Cluster Size: ", value= 2, min= 2, 
                                                  style={'description_width': 'initial'}, 
                                                  tooltip= "Minimum objects required per cluster")
        self.clusteringInput3 = widgets.BoundedIntText(description= "Number of Neighbors: ", value= 2, min= 1, 
                                                  style={'description_width': 'initial'}, 
                                                  tooltip= "The number of neighbors to compare with")
        self.clusterStack = widgets.Stack([self.clusteringInput, self.clusteringInput1, 
                                           self.clusteringInput2, self.clusteringInput3], selected_index= 0)
        self.clusterButton = widgets.Button(description='Cluster', tooltip= 'Start clustering')
        self.clusterBar = widgets.VBox([self.min_df, self.dimRed1, self.dimensions, self.clusteringWhen, 
                                        self.clusteringMethod, self.clusterStack, self.clusterButton])
        widgets.jslink((self.clusteringMethod, 'index'), (self.clusterStack, 'selected_index'))
        self.clusterButton.on_click(self.cluster)
        self.dimensions.observe(self.dimensionHandler, names='value')
        self.operators[4] = self.clusterBar

        self.stack = widgets.Stack(self.operators, selected_index= 0)
        widgets.jslink((self.selector, 'index'), (self.stack, 'selected_index'))    
    
# File upload
fileUp = widgets.widgets.FileUpload(
    accept='.csv, .txt, .xls, .tsv',  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
    multiple=False  # True to accept multiple files upload else False
)
    
def fileHandler(change):
    #print(change)
    #print(fileUp.value[0])
    #print(fileUp.value[0]['type'])
    startSession(fileUp.value[0])

    
with out:
    #display(menu)
    #display(stack)
    print("Select a file to analyze:")
    display(fileUp)

fileUp.observe(fileHandler, names="value")
out

Output()