In [1]:
# Import Libraries
import pymongo
import json
import time
from datetime import datetime
import mysql.connector as cnx
import pickle

In [2]:
# connect to mysql server
mydb = cnx.connect(
  host="localhost",
  user="root",
  password="root@123",
  database="mydatabase"
)

mycursor = mydb.cursor(buffered=True)

In [3]:
#connect to mongodb
client = pymongo.MongoClient("mongodb://localhost:27017/") 
db = client["Tweets_DB"]
tweets_collec = db["Tweets_data"]

In [4]:
class Cache:
    def __init__(self, max_size=1000, evict_strategy='least_accessed', checkpoint_interval=3600, ttl=None):
        self.max_size = max_size
        self.evict_strategy = evict_strategy
        self.checkpoint_interval = checkpoint_interval
        self.ttl = ttl
        self.cache = {}
        self.access_count = {}
        self.last_checkpoint = time.time()

    def load_from_checkpoint(self, checkpoint_file):
        with open(checkpoint_file, 'rb') as f:
            self.cache, self.access_count = pickle.load(f)

    def save_to_checkpoint(self, checkpoint_file):
        with open(checkpoint_file, 'wb') as f:
            pickle.dump((self.cache, self.access_count), f)
            
    def get(self, key):
        
        if key[0].isdigit():
            if key not in self.cache:
                return None
            similar_keys = [key]
            
        else:
            similar_keys = []
            for k in self.cache:
                if key in k:
                    similar_keys.append(k)

            if len(similar_keys) == 0:
                return None
        
        if self.ttl is not None and (time.time() - self.cache[key]['timestamp']) > self.ttl:
            del self.cache[key]
            del self.access_count[key]
            return None
        
        for i in similar_keys:
            self.access_count[i] += 1
            
            if self.evict_strategy == 'least_accessed':
                least_accessed_key = min(self.access_count, key=self.access_count.get)
                if len(self.cache) > self.max_size and key != least_accessed_key:
                    del self.cache[least_accessed_key]
                    del self.access_count[least_accessed_key]
                
        return [self.cache[k]['value'] for k in similar_keys]

    def put(self, key, value):
        key = key.lower()
        self.cache[key] = {'value': value, 'timestamp': time.time()}
        self.access_count[key] = 0
        if len(self.cache) > self.max_size:
            if self.evict_strategy == 'least_accessed':
                least_accessed_key = min(self.access_count, key=self.access_count.get)
                del self.cache[least_accessed_key]
                del self.access_count[least_accessed_key]
            elif self.evict_strategy == 'oldest':
                oldest_key = min(self.cache, key=lambda k: self.cache[k]['timestamp'])
                del self.cache[oldest_key]
                del self.access_count[oldest_key]
        if (time.time() - self.last_checkpoint) > self.checkpoint_interval:
            self.save_to_checkpoint('cache.checkpoint')
            self.last_checkpoint = time.time()
            
    def print_cache(self):
        print('Cache:')
        for key, value in self.cache.items():
            print(f"{key}: {value['value']}")
        used_space = len(self.cache)
        remaining_space = self.max_size - used_space
        print(f"Cache size: {used_space}")
        print(f"Remaining space: {remaining_space}")

In [5]:
cache = Cache()

In [6]:
# check if the search term starts with '@'
def UserSearch(search_term):
    
    if search_term.startswith('@'):
    # remove the '@' symbol from the search term
        search_term = search_term[1:]
        
        if cache.get(search_term):
            print("getting")
            results = cache.get(search_term)
            
        else:
            print("putting")
            # execute the query to search for user details based on username
            query = """
                SELECT * FROM users 
                WHERE name LIKE %s 
                ORDER BY followers_count DESC, tweets_count DESC, verified DESC
                LIMIT 5
                """
            mycursor.execute(query, ('%' + search_term + '%',))
            results = mycursor.fetchall()
            for i in range(0,len(results)):
                cache.put(results[i][1], results[i])

        return results

In [7]:
def get_user_tweets(user_id):
    
    if cache.get(user_id):
        print("getting tweet")
        tweet_details = cache.get(user_id)
    
    else:
        print("putting tweet")
        
        user_tweets = list(tweets_collec.find({'User_Id': user_id}).sort([('created_at', -1)]).limit(3))
        tweet_details = []
        
        for tweet in user_tweets:
            tweet_details.append({
                'created_at': tweet['created_at'],
                'text': tweet['Text'],
                'hashtags': tweet['Hashtag'],
                'retweet_count': tweet['Retweet_Count'],
                'likes_count': tweet['Likes_Count']
            })
        
        cache.put(user_id, tweet_details)
    return tweet_details

In [8]:
tweets_cache={}
def UserPrint(results):
    for result in results:
        user_id = result[0]
        tweets_cache[user_id] = get_user_tweets(user_id)
        if result[3]==1:
            verified_status="✅"
        else:
            verified_status="❌"
            /
        line1 = "ID: {} | Name: {} | Verified: {}".format(result[0], result[1], verified_status)
        # format the remaining fields in another line
        line2 = "Followers: {} | Tweets: {}".format(result[4], result[8])
        line3 = "Description : {}".format(result[9])
        line4="Location : {} | Creation Date:{}".format(result[7],result[6])
        
        # print both lines
        print(line1)
        print(line2)
        print(line3)
        print(line4)
        print("--------------------------------------------")

In [17]:
search_term = input("Enter the search term: ")

start_sql = time.perf_counter()
results=UserSearch(search_term)
end_sql = time.perf_counter()

sql_time = end_sql - start_sql
print("Time for getting user info: ", sql_time)

start_mongo = time.perf_counter()
UserPrint(results[:3])
end_mongo = time.perf_counter()
mongo_time = end_mongo - start_mongo
print("Time for getting tweets info: ", mongo_time)

    # check if there are more results
if len(results) > 3:
        # prompt the user to load more results
    load_more = input("Load more results? (yes/no) ")
    if load_more.lower().startswith('y'):
        UserPrint(results[3:5])
user_choice = int(input("Enter the number of the user whose tweets you want to see: "))

# Get the user_id of the selected user
user_id = results[user_choice-1][0]

# Display the tweets of the selected user

if user_id in tweets_cache:
    print(f"Tweets of {results[user_choice-1][1]}:")
    for tweet in tweets_cache[user_id]:
        print(tweet)
else:
    print("No tweets found for the selected user.")

Enter the search term: @sai
{'rajdeep sardesai': 2, 'saint laurent don': 2, 'syed talat hussain': 2, 'syed shahnawaz hussain': 2, 'husain haqqani': 2, '56304605': 2, '260114837': 2, '286042531': 2, '350128848': 2, '80330381': 2}
inside:  rajdeep sardesai
inside:  saint laurent don
inside:  syed talat hussain
inside:  syed shahnawaz hussain
inside:  husain haqqani
getting
{'rajdeep sardesai': 3, 'saint laurent don': 3, 'syed talat hussain': 3, 'syed shahnawaz hussain': 3, 'husain haqqani': 3, '56304605': 2, '260114837': 2, '286042531': 2, '350128848': 2, '80330381': 2}
inside:  rajdeep sardesai
inside:  saint laurent don
inside:  syed talat hussain
inside:  syed shahnawaz hussain
inside:  husain haqqani
Time for getting user info:  0.0003042919561266899
{'rajdeep sardesai': 4, 'saint laurent don': 4, 'syed talat hussain': 4, 'syed shahnawaz hussain': 4, 'husain haqqani': 4, '56304605': 2, '260114837': 2, '286042531': 2, '350128848': 2, '80330381': 2}
inside:  56304605
getting tweet
{'ra

In [10]:
# Search by hashtag

In [11]:
# def get_top_hashtags(search_string, limit=5):
    
#     if search_string.startswith('#'):
#         search_string = search_string[1:]
        
#         hashtags = tweets_collec.aggregate([
#         { "$match": { "Hashtag": { "$regex": search_string, "$options": "i" } } },
#         { "$unwind": "$Hashtag" },
#         { "$group": { "_id": "$Hashtag", "count": { "$sum": 1 } } },
#         { "$sort": { "count": -1 } },
#         { "$limit": limit }
#         ])

#         hashtag_dict = {}
#         for hashtag in hashtags:
#             hashtag_dict[hashtag['_id']] = hashtag['count']
            
#         return hashtag_dict

In [12]:
# # level 2 display
# def tweets_of_hashtag(hashtag):
#     tweets = list(tweets_collec.find({'Hashtag': hashtag}).sort('created_at', -1).limit(3))
#     for tweet in tweets:
#         print(f"Created at: {tweet['created_at']}\nText: {tweet['Text']}\nUser ID: {tweet['User_Id']}\nRetweet Count: {tweet['Retweet_Count']}\nLikes Count: {tweet['Likes_Count']}\n")
#         print("----------------------------------------------------------------------------------")

In [13]:
# # level 1 display

# hashtags = get_top_hashtags("19")
# print("Top 5 hashtags matching the search string: ")
# print("------------------------------------------")
# for k,v in hashtags.items():
#     print("Hashtag: {}\nCount: {}\n".format(k, v))

In [14]:
# for hashtag in hashtags.keys():
#     tweets_of_hashtag(hashtag)

In [15]:
cache.print_cache()

Cache:
rajdeep sardesai: ('56304605', 'Rajdeep Sardesai', 'sardesairajdeep', 1, 8947331, 568, datetime.datetime(2009, 7, 13, 6, 14, 44), 'New Delhi', 133, "Citizen first. Only 'ism' is humanism. newsman, tv anchor, author, father, friend. New book: 2019: How Modi Won India. pre order here: http://bit.ly/HowModiWon")
saint laurent don: ('260114837', 'Saint Laurent Don', '21savage', 1, 3691645, 14, datetime.datetime(2011, 3, 3, 6, 42, 26), 'Atlanta, GA', 5, 'SavageMode')
syed talat hussain: ('286042531', 'Syed Talat Hussain', 'TalatHussain12', 1, 3348942, 0, datetime.datetime(2011, 4, 22, 7, 21, 57), 'Islamabad', 2, 'Journalist, writer, obsessive jogger, voracious reader, massive meat-eater')
syed shahnawaz hussain: ('350128848', 'Syed Shahnawaz Hussain', 'ShahnawazBJP', 1, 1731505, 887, datetime.datetime(2011, 8, 7, 7, 33, 35), 'New Delhi, India', 16, 'National Spokesperson, BJP & Former Union Minister, Government of India.')
husain haqqani: ('80330381', 'Husain Haqqani', 'husainhaqqani