In [None]:
import time
import pymongo
import psycopg2
import pandas as pd
from cache import SearchCache

In [None]:
class SearchEngine:
    def __init__(self, db_type, cache_size=100, cache_ttl=3600):
        """
        Initializes a SearchEngine object with a specified database type and cache settings.

        Args:
        - db_type (str): Either "mongodb" or "postgresql"
        - cache_size (int): Maximum number of items to store in cache
        - cache_ttl (int): Time-to-live (in seconds) for cached items
        """
        # initialize the search engine with a database type and cache size/ttl values
        self.db_type = db_type
        # initialize a cache object for the search engine using the SearchCache class
        self.cache = SearchCache(cache_size, cache_ttl)
        # establish a database connection based on the given database type
        if self.db_type == 'mongodb':
            self.db_client = pymongo.MongoClient('mongodb+srv://<user>:<password>@cluster0.wkyhu.mongodb.net/?retryWrites=true&w=majority')
            self.tweets_collection = self.db_client['twitter_db']['tweets_data']
        elif self.db_type == 'postgresql':
            self.db_conn = psycopg2.connect(database="postgres", user="postgres", password="", host="localhost")
            self.users_cursor = self.db_conn.cursor()
        else:
            # raise a ValueError if an invalid database type is given
            raise ValueError('Invalid database type')

    
    def most_engaging_tweets(self, n=10):
        """
        Returns the most engaging n tweets in the database, where engagement is defined as the sum of retweet
        count, reply count, and favorite count.

        Args:
        - n (int): Number of tweets to return.

        Returns:
        - list: List of the top n tweets, each represented as a dictionary.
        """
        if 'most_engaging_tweets' in self.cache:
            print("Retrieving 'most engaging tweets' from cache!")
            return self.cache['most_engaging_tweets']

        pipeline = [
            {
                '$project': {
                    '_id': 0,
                    'tweet_id': 1,
                    'user': 1,
                    'name': 1,
                    'date': 1,
                    'text': 1,
                    'retweet': {
                        '$cond': {
                            'if': { '$eq': ['$is_retweet', True] },
                            'then': '$retweet',
                            'else': None
                        }
                    },
                    'quote': {
                        '$cond': {
                            'if': { '$eq': ['$is_quote', True] },
                            'then': '$quote',
                            'else': None
                        }
                    },
                    'retweet_count': {
                        '$sum': {
                            '$cond': [
                                { '$eq': ['$quote', None] },
                                {
                                    '$cond': [
                                        { '$eq': ['$retweet', None] },
                                        '$retweet_count',
                                        '$retweet.retweet_count'
                                    ]
                                },
                                '$quote.retweet_count'
                            ]
                        }
                    },
                    'reply_count': {
                        '$sum': {
                            '$cond': [
                                { '$eq': ['$quote', None] },
                                {
                                    '$cond': [
                                        { '$eq': ['$retweet', None] },
                                        '$reply_count',
                                        '$retweet.reply_count'
                                    ]
                                },
                                '$quote.reply_count'
                            ]
                        }
                    },
                    'favorite_count': {
                        '$sum': {
                            '$cond': [
                                { '$eq': ['$quote', None] },
                                {
                                    '$cond': [
                                        { '$eq': ['$retweet', None] },
                                        '$favorite_count',
                                        '$retweet.favorite_count'
                                    ]
                                },
                                '$quote.favorite_count'
                            ]
                        }
                    },
                    'quote_count': {
                        '$sum': {
                            '$cond': [
                                { '$eq': ['$quote', None] },
                                {
                                    '$cond': [
                                        { '$eq': ['$retweet', None] },
                                        '$quote_count',
                                        '$retweet.quote_count'
                                    ]
                                },
                                '$quote.quote_count'
                            ]
                        }
                    },
                }
            },
            {
                '$addFields': {
                    'engagement': {
                        '$sum': ['$retweet_count', '$reply_count', '$favorite_count', '$quote_count']
                    }
                }
            },
            {
                '$sort': {
                    'engagement': pymongo.DESCENDING
                }
            },
            {
                '$limit': n
            }
        ]

        tweets = list(self.tweets_collection.aggregate(pipeline))
        self.cache['most_engaging_tweets'] = tweets
        return tweets


    
    
    def search_by_username(self, username, n=10):
        """
        Returns the top n users in the database matching the given username.

        Args:
        - username (str): The username to search for.
        - n (int): Number of users to return.

        Returns:
        - list: List of the top n users, each represented as a dictionary.
        """
        if username in self.cache:
            print(f"Retrieving {username} from cache!")
            return self.cache[username]

        query = f"""
            SELECT user_id, name, twitter_join_date, followers_count,
            friends_count, favourites_count, verified
            FROM {self.user_table}
            WHERE name LIKE '%{username}%'
            ORDER BY followers_count DESC, verified DESC
            LIMIT {n}
        """
        self.users_cursor.execute(query)
        results = self.users_cursor.fetchall()

        users = []
        for row in results:
            user = {
                'user_id': row[0],
                'name': row[1],
                'twitter_join_date': row[2],
                'followers_count': row[3],
                'friends_count': row[4],
                'favourites_count': row[5]
                'verified': row[6]
            }
            users.append(user)

        self.cache[username] = users
        return users

In [None]:
# create a SearchEngine object for MongoDB with cache size of 50 and cache TTL of 10 seconds
search_engine = SearchEngine(db_type='mongodb', cache_size=50, cache_ttl=60)

In [None]:
users = search_engine.search_by_username("juwelz v")
users

Unnamed: 0,name,text,reply_count,engagement_score
0,juwelz v,wishing death on people is weirdo behavior.,0,0


In [None]:
top_tweets = search_engine.most_engaging_tweets()
top_tweets

[{'tweet_id': 1254051230822944770,
  'user': 1039346340449452033,
  'name': 'Grace',
  'date': 'Sat Apr 25 14:14:47 +0000 2020',
  'text': 'But Joe, what if I WANT to drink bleach? What if I wanted to do that even before the orange man said to inject Lysol into our veins to stop corona? What if?',
  'retweet': None,
  'quote': {'tweet_id': 1253751812194070529,
   'user_id': 939091,
   'user_name': 'Joe Biden',
   'quote_count': 32237,
   'reply_count': 46159,
   'retweet_count': 263475,
   'favorite_count': 1280593,
   'media': {'hashtags': [], 'urls': [], 'mentions': []}},
  'retweet_count': 263475,
  'reply_count': 46159,
  'favorite_count': 1280593,
  'quote_count': 32237,
  'engagement': 1622464},
 {'tweet_id': 1254044290344521728,
  'user': 863214058845200384,
  'name': 'Control Roboto 🤖 💚 Cine y Series',
  'date': 'Sat Apr 25 13:47:12 +0000 2020',
  'text': 'Esto es terrible, si no los mata el corona, se matan entre ellos con rifles o tomando lavandina.\nDe pedo siguen vivos.',
 