In [90]:
from pymongo import MongoClient
import pymongo
import json
import time

class ProjectCache:
    
    def __init__(self, database):
        self.cache = {}                                      #Cache
        self.minheap = []                                    #Minheap
        self.cache_limit_value = 200                         #The cache will be set up to only hold 200 tweets
        self.db = MongoClient(database)["TwitterDB"]
        self.tweets = self.db["tweets"]
        ktdm = list(self.db["keywords"].find())  #Holds all keywords and ids associated
        self.keyword_tdm = dict((x['_id'], x['tweets']) for x in ktdm)
        htdm = list(self.db["hashtags"].find())
        self.hashtag_tdm = dict((x['_id'], x['tweets']) for x in htdm)              #Holds all hashtags and ids associated
        self.recent_searches = []                            #Currently not in use
        self.total_searches_kept_in_cache = 5                #Currently not in use
        
        
    def __contains__(self, key):
        
        return key in self.cache
    
    def current_cache_size(self):
        
        return len(self.cache)
    
    def initial_population(self):
        
        #Read every tweet in collection and attempt to add them one by one to cache
        for i in self.tweets:
            add_update(i["id"], i)
           
    def add_update(self, key, value):
        
        #Add a section that first checks if the current key's relevance value is greater 
        #than the least relevant term in the cache
        if key not in self.cache:
            #If the cache limit is met get the least relevant in cache then compare current item with least relevant in cache
            if len(self.cache) >= self.cache_limit_value:  
                least_relevant_term()                                       
                if value["relevance"] > least_relevant_value_in_cache:      
                    
                    #then remove least relevant to make room in cache
                    self.remove_least_relevant(least_relevant_key, least_relevant_value_in_cache)          
                    
                    #Add key and relevance value to minheap list
                    minheap_sort(key, value["relevance"])
                    
                    #Then add key and all tweet elements to cache
                    self.cache[key] = value                  
        
            else:
                #Add key and relevance value to minheap list
                minheap_sort(key, value["relevance"])
                
                #Then add key and all tweet elements to cache
                self.cache[key] = value
        
    def remove_least_relevant(self, least_relevant_key, least_relevant_value_in_cache):
        
        #Remove least relevant from both the cache and the heaped list
        self.cache.pop(least_relevant)
        heappop(self.minheap, (least_relevant_value_in_cache, least_relevant))
        heapify(self.minheap)
        
    def least_relevant_term(self):
        
        #Get the least relevant term within the cache as well as key associated with it.      
        least_relevant_value_in_cache = self.minheap[0][0]
        least_relevant_key = self.minheap[0][1]
        return least_relevant_value_in_cache, least_relevant_key
    
    def minheap_sort(self, key, value):
        
        #The relevance will be the new key, and the key will be the value in the minheap.      
        heappush(self.minheap, (value, key))
        heapify(self.minheap)
    
    def keyword(self, word, querytype):
        
        #Make a blank dictionary
        l={}
        
        #Grab all the tweet ids associated with the word/hashtag
        if querytype == "Text":
            ids = self.keyword_tdm[word] #Could use .head(10) to get only the top 10
        elif querytype == "Hashtag":
            ids = self.hashtag_tdm[word]
        
        #Search through cache first for all tweet ids
        #If it's not in the cache find it in the database
        for i in ids:
            if i in self.cache:
                l[i] = self.cache[i]
            else:
                l[i] = self.tweets.find_one({"_id":i})
                
        l = pd.DataFrame.from_dict(l).transpose()
        
        #Return a dictionary of all of the ids and associated tweets
        return l
    



In [91]:
from dash import Dash, html, dcc, callback, Output, Input, dash_table
from dash.dependencies import State
import pandas as pd

# import dash_bootstrap_components as dbc

# Some default set of tweets (could be 1) - the relevant part is the column headers
#df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')

database = "mongodb://localhost:27017"

PC = ProjectCache(database)

In [None]:
app = Dash(__name__)

app.layout = html.Div([
    html.H1(children='Tweet Search', style={'textAlign':'center'}),
    dcc.RadioItems(['Text', 'Hashtag'], 'Text', id='search_selection', inline=True),
    dcc.Input(id="search_query", type="text", placeholder="search query"),
    html.Button('Search', id='search_submit'),
    html.Div(id='search_time'),
    html.H2(children='Output'),
    dash_table.DataTable(data=pd.DataFrame().to_dict('records'),
    columns=[{"name": i, "id": i} for i in ['user_name', 'created_at', 'text', 'reply_count', 'retweet_count']], id='search_output'),
    html.H2(children='Similar'),
    dash_table.DataTable(data=pd.DataFrame().to_dict('records'),
    columns=[{"name": i, "id": i} for i in pd.DataFrame().columns], id='similar_df')
])

@callback(
    Output('search_output', 'data'),
    Output('similar_df', 'data'),
    Output('search_time', 'children'),
    State('search_selection', 'value'),
    State('search_query', 'value'),
    Input('search_submit', 'n_clicks')
) # This function should be updated to refer to and call tweets as appropriate
def update_table(stype, value, n):
    start = time.time()
    
    dff = PC.keyword(value, stype)
    dff2 = dff
    
    end = time.time()
    return dff.reset_index().to_dict("records"), dff2.reset_index().to_dict("records"), "Search Time: {} s".format(end-start, 3)

if __name__ == '__main__':
    app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:8050
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [24/Apr/2023 17:04:46] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [24/Apr/2023 17:04:47] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [24/Apr/2023 17:04:47] "GET /_dash-dependencies HTTP/1.1" 200 -
[2023-04-24 17:04:47,114] ERROR in app: Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "/Users/mazinrafi/miniconda3/lib/python3.10/site-packages/flask/app.py", line 2528, in wsgi_app
    response = self.full_dispatch_request()
  File "/Users/mazinrafi/miniconda3/lib/python3.10/site-packages/flask/app.py", line 1825, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/Users/mazinrafi/miniconda3/lib/python3.10/site-packages/flask/app.py", line 1823, in full_dispatch_request
    rv = self.dispatch_request()
  File "/Users/mazinrafi/miniconda3/lib/python3.10/site-packages/flask/app.py", line 1799, in dispatch_request
    return self.ensure_sync(self.view_funct