In [1]:
import time
import pymongo
import psycopg2
from collections import OrderedDict

In [2]:
import time
from collections import OrderedDict

class SearchCache:
    def __init__(self, max_size=1000, ttl=3600):
        """
        Initializes the cache with a maximum size and time-to-live (ttl) values.

        Args:
        - max_size (int): Maximum number of items that the cache can hold.
        - ttl (int): Time-to-live of each cache entry in seconds.
        """
        self.max_size = max_size
        self.ttl = ttl
        # initialize an ordered dictionary that will be used as the cache
        self.cache = OrderedDict()

    def __contains__(self, key):
        """
        Returns a boolean value indicating if a key is present in the cache.

        Args:
        - key (str): Key to look up in the cache.

        Returns:
        - bool: True if the key is present in the cache, False otherwise.
        """
        return key in self.cache

    def __getitem__(self, key):
        """
        Retrieves the value and timestamp from the cache for a given key.

        Args:
        - key (str): Key to look up in the cache.

        Returns:
        - Any: Value associated with the given key.

        Raises:
        - KeyError: If the cache entry has expired.
        """
        # retrieve the value and timestamp from the cache for a given key
        value, timestamp = self.cache[key]
        # check if the cache entry has expired by comparing its timestamp with the current time
        if time.time() - timestamp > self.ttl:
            # remove the expired cache entry
            self.cache.pop(key)
            # raise a KeyError with a message indicating the cache entry has expired
            raise KeyError('Cache entry has expired')
        # move the accessed cache entry to the end of the ordered dictionary
        self.cache.move_to_end(key)
        # return the value associated with the given key
        return value

    def __setitem__(self, key, value):
        """
        Adds or updates a cache entry for the given key with its corresponding value and timestamp.

        Args:
        - key (str): Key to add or update in the cache.
        - value (Any): Value to associate with the given key in the cache.
        """
        # check if the given key is already present in the cache
        if key in self.cache:
            # move the existing cache entry to the end of the ordered dictionary
            self.cache.move_to_end(key)
        # adds a new cache entry for the given key with its corresponding value and timestamp
        self.cache[key] = (value, time.time())
        # check if the cache has exceeded its maximum size
        if len(self.cache) > self.max_size:
            # remove the least recently used cache entry (i.e., the first item in the ordered dictionary)
            self.cache.popitem(last=False)


In [3]:
import pymongo
import psycopg2

class SearchEngine:
    def __init__(self, db_type, cache_size=100, cache_ttl=3600):
        """
        Initializes a SearchEngine object with a specified database type and cache settings.

        Args:
        - db_type (str): Either "mongodb" or "postgresql"
        - cache_size (int): Maximum number of items to store in cache
        - cache_ttl (int): Time-to-live (in seconds) for cached items
        """
        # initialize the search engine with a database type and cache size/ttl values
        self.db_type = db_type
        # initialize a cache object for the search engine using the SearchCache class
        self.cache = SearchCache(cache_size, cache_ttl)
        # establish a database connection based on the given database type
        if self.db_type == 'mongodb':
            self.db_client = pymongo.MongoClient('mongodb+srv://<user>:<password>@cluster0.wkyhu.mongodb.net/?retryWrites=true&w=majority')
            self.tweets_collection = self.db_client['twitter_db']['tweets_data']
        elif self.db_type == 'postgresql':
            self.db_conn = psycopg2.connect(database="twitter_db", user="postgres", password="", host="localhost")
            self.users_cursor = self.db_conn.cursor()
        else:
            # raise a ValueError if an invalid database type is given
            raise ValueError('Invalid database type')

    def get_top_hashtags(self, n=10):
        """
        Retrieves the top N most frequently used hashtags from the database or cache.

        Args:
        - n (int): Number of top hashtags to retrieve

        Returns:
        - top_hashtags (list): A list of dictionaries representing the top N hashtags, sorted by frequency of use
        """
        # check if the 'top_hashtags' key is present in the cache
        if 'top_hashtags' in self.cache:
            # retrieve the cached top hashtags and returns the first n items
            return self.cache['top_hashtags'][:n]
        else:
            # define a pipeline for aggregating top hashtags from the tweets collection
            pipeline = [
                {"$unwind": "$entities.hashtags"},
                {"$group": {"_id": "$entities.hashtags.text", "count": {"$sum": 1}}},
                {"$sort": {"count": -1}},
                {"$limit": 100}
            ]
            top_hashtags = list(self.tweets_collection.aggregate(pipeline))
            self.cache['top_hashtags'] = top_hashtags
            return top_hashtags[:n]

    def get_popular_users(self, n=10):
        """
        Retrieves the top N most popular users (by number of tweets) from the database or cache.

        Args:
        - n (int): Number of top users to retrieve

        Returns:
        - popular_users (list): A list of dictionaries representing the top N users, sorted by number of tweets
        """
        # if the popular users are already cached, return the cached list of popular users
        if 'popular_users' in self.cache:
            return self.cache['popular_users'][:n]
        else:
            # query Postgres for user data
            self.users_cursor.execute("SELECT id, name, screen_name, followers_count FROM twitter_users")
            users_data = self.users_cursor.fetchall()
            # get tweets count for each user from MongoDB
            users_tweets_count = {}
            for user_data in users_data:
                user_id = user_data[0]
                tweets_count = self.tweets_collection.count_documents({'user_id': user_id})
                users_tweets_count[user_id] = tweets_count
            # sort users by number of tweets and return top N
            sorted_users = sorted(users_data, key=lambda user_data: users_tweets_count[user_data[0]], reverse=True)
            popular_users = [{"id": row[0], "name": row[1], "screen_name": row[2], "followers_count": row[3], "count": users_tweets_count[row[0]]} for row in sorted_users]
            self.cache['popular_users'] = popular_users
            return popular_users[:n]