In [3]:
import jupyter
from tweet_browser_test import tweet_browser as tb
import ipywidgets as widgets
import voila
from matplotlib import pyplot as plt
from ipysheet import sheet, cell, row, column, cell_range
from IPython.display import display
import pandas as pd
import io

colHeaders = ["SenderScreenName", "CreatedTime", "Message"]   
operators = [None] * 5
s = tb.createSession("allCensus_sample.csv")
colHeaders = list(s.headerDict.keys())
out = widgets.Output()

def startSession(file):
    if file['type'] == 'xls':
        df = pd.read_excel(io.BytesIO(file.content))
    else:
        df = pd.read_csv(io.BytesIO(file.content))
    global s
    s = tb.Session(df)

def selectColumns (row, colHeaders: list):
    result = []
    for j in colHeaders:
        result.append(row[s.headerDict[j]])
    return result

def resetDisplay(b = None):
    global stack, currentSetCount, sampleSelector
    currentSetCount.value = str(s.currentSet.size)
    sampleSelector.max = s.currentSet.size
    out.clear_output(True)
    with out:
        display(menu)
        display(stack) 
        
def back(b):
    try: 
        s.back()
        getTweets(b)
    except IndexError:
        pass
    
def forward(b):
    try: 
        s.next()
        getTweets(b)
    except IndexError:
        pass
    
def randomSample(b):
    print(s.currentSet.size)
    print(sampleSelector.value)
    s.simpleRandomSample(sampleSelector.value)
    getTweets(b)

def weightedSample(b):
    print(s.currentSet.size)
    print(sampleSelector.value)
    s.weightedSample(sampleSelector.value, weightedSelector.value)
    getTweets(b)
    
def searchKeyword(b):
    if searchType.value == 'Contains Any':
        s.searchKeyword(searchBox.value.split(' '), True)
    elif searchType.value == 'Contains All':
        s.searchKeyword(searchBox.value.split(' '))
    elif searchType.value == 'Advanced':
        s.advancedSearch(searchBox.value)
    elif searchType.value == 'Regular Expression':
        s.regexSearch(searchBox.value)
    else:
        return
    getTweets(b)
    
def filterBy(b):
    index = filterBox.value.find(" ")
    if index == -1:
        print("Invalid input")
        return
    columnName = filterBox.value[:index]
    if not (columnName in s.headerDict):
        print("Invalid column")
        return
    value = filterBox.value[index + 1:]
    s.filterBy(columnName, value)
    getTweets(b)

def dimensionHandler(change):
    global clusterStack, operators, stack
    reset = True
    if change['new'] > 2:
        clusteringWhen = widgets.ToggleButtons(options=[("Before stage 1", "before_stage1"), 
                                ("Between 1 and 2", "btwn"),("After stage 2", "after_stage2")], 
                                value= "before_stage1", description= "When to cluster: ",
                                tooltip= "When clustering will be performed relative to dimension reduction")
        clusterBar = widgets.VBox([min_df, dimRed1, dimensions, dimRed2, clusteringWhen, 
                            clusteringMethod, clusterStack, clusterButton])
        if change['old'] > 2:
            reset = False
    else:
        clusteringWhen = widgets.ToggleButtons(options=[("Before stage 1", "before_stage1"), ("Between 1 and 2", "btwn")], 
                                value= "before_stage1", description= "When to cluster: ",
                                tooltip= "When clustering will be performed relative to dimension reduction")
        clusterBar = widgets.VBox([min_df, dimRed1, dimensions, clusteringWhen, 
                                   clusteringMethod, clusterStack, clusterButton])
        if change['old'] <= 2:
            reset = False
    if reset:
        operators[4] = clusterBar
        stack = widgets.Stack(operators, selected_index= 4)
        resetDisplay()

def cluster(b):
    matrix, words = s.make_full_docWordMatrix(min_df= min_df.value)
    clusteringInput = clusterStack.children[clusterStack.selected_index].value
    result = s.dimRed_and_clustering(docWordMatrix_orig=matrix, dimRed1_method=dimRed1.value, 
                                     dimRed1_dims=dimensions.value, dimRed2_method=dimRed2.value, 
                                     clustering_when=clusteringWhen.value, clustering_method=clusteringMethod.value, 
                                     num_clusters=clusteringInput, min_obs=clusteringInput, num_neighbors=clusteringInput)
    with out:
        result.show()
    return result
    

def getTweets(b):
    resetDisplay()
    htmlText = "<style>table, th, td {border: 1px solid black;} th, td {padding: 10px;}</style>"
    htmlText += "<table> <tr> "
    for header in columns.value:
        htmlText += " <th> " + header + " </th> "
    dataSet = s.getCurrentSubset()
    for i in range(len(dataSet)):
        htmlText += " <tr> "
        #cols = selectColumns(dataSet.iloc[i], colHeaders)
        #for j in colHeaders:
        for j in columns.value:
            #print(type(dataSet.iloc[i].loc[j]))
            htmlText += " <td>" + str(dataSet.iloc[i].loc[j]) + "</td> "
        htmlText += " </tr> "
    table = widgets.HTML(value=htmlText + " </table>")
    with out:
        print("Tweets displayed:", s.currentSet.size)
        display(table)

####### UI Widgets #######

# File upload
fileUp = widgets.widgets.FileUpload(
    accept='.csv, .txt, .xls, .tsv',  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
    multiple=False  # True to accept multiple files upload else False
)

# Menu Buttons
selector = widgets.Dropdown(
    options=['Random Sample', 'Weighted Sample', 'Search Keyword', 'Filter By', 'Clustering'],
    value='Random Sample', description='Operation:')
clearButton = widgets.Button(description='Clear', tooltip='Clear current output display')
refreshButton = widgets.Button(description= 'Refresh', tooltip= 'Display the current subset again')
backButton = widgets.Button(description= '<-', tooltip= 'Go back to the previous working set, if possible')
forwardButton = widgets.Button(description= '->', tooltip= 'Go forward to the next working set, if possible')
# currentSet = widgets.HTML(value = "Current Set Size: ")
currentSetCount = widgets.HTML(value = str(s.currentSet.size))
currentSet = widgets.HBox([widgets.HTML(value = "Current Set Size: "), currentSetCount])
columns = widgets.SelectMultiple(options=[col for col in s.headerDict.keys()], layout={'width': 'max-content'},
                                 value=["SenderScreenName", "CreatedTime", "Message"], description='Columns')
columnsAccordian = widgets.Accordion(children = [columns], titles= ["Select Columns"])
menuButtons = widgets.HBox([selector, clearButton, refreshButton, backButton, forwardButton])
menu = widgets.VBox([currentSet, columnsAccordian, menuButtons])
clearButton.on_click(resetDisplay)
refreshButton.on_click(getTweets)
backButton.on_click(back)
forwardButton.on_click(forward)


# Random Sample
randomSampleButton = widgets.Button(description='Get Tweets', tooltip='Get Tweets')   
sampleSelector = widgets.IntSlider(value=300, min=0, max=s.currentSet.size, step=1, 
                description='Sample size', continuous_update=False, orientation='horizontal', readout=True, 
                readout_format='d', tooltip= 'The number of tweets that will be selected in the sample')
randomSampleBar = widgets.HBox([sampleSelector, randomSampleButton])
randomSampleButton.on_click(randomSample)
operators[0] = randomSampleBar

# Weighted Sample
weightedButton = widgets.Button(description='Get Tweets', tooltip='Get Tweets')  
weightedSelector = widgets.Text(value='', placeholder='<Column_name>', description='Weight by')
weightedBar = widgets.HBox([sampleSelector, weightedSelector, weightedButton])
weightedButton.on_click(weightedSample)
operators[1] = weightedBar

#Search Bar
searchBox = widgets.Text(value='', placeholder='Keyword', description='Input:')
searchButton = widgets.Button(description='Search', tooltip='Run search on current input')
ContainsAnyToolTip = 'Returns tweets with any typed keyword (seperated by spaces)'
ContainsAllToolTip = 'Returns tweets containing all typed space-separated keywords'
AdvancedSearchToolTip = 'Search for tweets that match the inputted logical expression'
RegularExpressionToolTip = 'Returns tweets that match regular expression'
searchType = widgets.ToggleButtons(options=['Contains Any', 'Contains All', 'Advanced', 'Regular Expression'],
    tooltips=[ContainsAnyToolTip, ContainsAllToolTip, AdvancedSearchToolTip, RegularExpressionToolTip])
searchBar = widgets.VBox([widgets.HBox([searchBox, searchButton]), searchType])
searchButton.on_click(searchKeyword)
operators[2] = searchBar

#Filer By
filterBox = widgets.Text(value='', placeholder='<Column_name> <value>', description='Filter:')
filterButton = widgets.Button(description='Filter', tooltip='Filter by column on current input')
filterBar = widgets.HBox([filterBox, filterButton])
filterButton.on_click(filterBy)
operators[3] = filterBar

#Clustering
min_df = widgets.IntText(value= 1, description= "Word Frequency", style={'description_width': 'initial'},
                          tooltip= "Specifies the minum frequency required for a word to be considered during processing")
dimRed1 = widgets.ToggleButtons(options=[('PCA', "pca"), ("UMAP", "umap")], value="pca", 
                                description="First dimension reduction method: ",
                          tooltip= "The method that will reduce the dimensions represented by words with the min frequency")
dimensions = widgets.BoundedIntText(value= 2, min= 2, max= len(s.headerDict) - 2, description= "Dimensions: ",
                                   tooltip= "The number of dimensions the first dimension reduction method should reduce to")
dimRed2 = widgets.ToggleButtons(options=[('PCA', "pca"), ("UMAP", "umap")], value="pca", 
                                description="Second dimension reduction method: ",
                               tooltip= "The second method used to reduce dimensionality to 2")
clusteringWhen = widgets.ToggleButtons(options=[("Before stage 1", "before_stage1"), ("Between 1 and 2", "btwn")], 
                                value= "before_stage1", description= "When to cluster: ",
                                    tooltip= "When clustering will be performed relative to dimension reduction")
clusteringMethod = widgets.ToggleButtons(options=[("GMM", "gmm"), ("K-Means", "k-means"), ("HDBScan", "hdbscan"), 
                                                  ("Leiden", "leiden")], value="gmm", description="Clustering method ",
                                        tooltip= "The clustering algorithm that will be used")
clusteringInput = widgets.BoundedIntText(description= "Number of Clusters: ", value= 2, min= 2,
                                         style={'description_width': 'initial'}, tooltip= "The number of clusters")
clusteringInput1 = widgets.BoundedIntText(description= "Number of Clusters: ", value= 2, min= 2,
                                          style={'description_width': 'initial'}, tooltip= "The number of clusters")
clusteringInput2 = widgets.BoundedIntText(description= "Minimum Cluster Size: ", value= 2, min= 2, 
                                          style={'description_width': 'initial'}, 
                                          tooltip= "Minimum objects required per cluster")
clusteringInput3 = widgets.BoundedIntText(description= "Number of Neighbors: ", value= 2, min= 1, 
                                          style={'description_width': 'initial'}, 
                                          tooltip= "The number of neighbors to compare with")
clusterStack = widgets.Stack([clusteringInput, clusteringInput1, clusteringInput2, clusteringInput3], selected_index= 0)
clusterButton = widgets.Button(description='Cluster', tooltip= 'Start clustering')
clusterBar = widgets.VBox([min_df, dimRed1, dimensions, clusteringWhen, clusteringMethod, clusterStack, clusterButton])
widgets.jslink((clusteringMethod, 'index'), (clusterStack, 'selected_index'))
clusterButton.on_click(cluster)
dimensions.observe(dimensionHandler, names='value')
operators[4] = clusterBar

stack = widgets.Stack(operators, selected_index= 0)
widgets.jslink((selector, 'index'), (stack, 'selected_index'))    
    
    
def fileHandler(change):
    #print(change)
    #print(fileUp.value[0])
    #print(fileUp.value[0]['type'])
    startSession(fileUp.value[0])
    resetDisplay()
    
with out:
    #display(menu)
    #display(stack)
    print("Select a file to analyze:")
    display(fileUp)
    
fileUp.observe(fileHandler, names="value")
out

[Errno 2] No such file or directory: './allCensus_sample.csv'


AttributeError: 'NoneType' object has no attribute 'columns'