In [19]:
"""Real-time analysis project - tweeter sentiment analysis""";

In [11]:
!venv\Scripts\activate

In [2]:
import requests
import json

In [15]:
# =============================================================
# CONFIGURATION
# =============================================================
with open("config.json", "r", encoding='utf-8') as conf:
    config = json.load(conf)

In [23]:
# =============================================================
# TWEET SCRAPING
# =============================================================

In [4]:
class ApiConnector:
    """Object providing methods for tweeter data scraping based on hashtag list provided by user"""
    
    url_base = "https://api.twitter.com/2/tweets/search/recent?query={}&max_results={}"
    
    def __init__(self, hashtags: list, max_results: int, bearer_token: str):
        self.hashtags = hashtags
        self.max_results = max_results
        self.headers = {"Authorization": f"Bearer {bearer_token}"}
        
    @property
    def query(self) -> str:
        
        _query_list = ["%23" + self.hashtags[0]]
        
        for tag in self.hashtags[1:]:
            _query_list.append("%20OR%20%23" + tag)
            
        _query = "".join(_query_list)
        return _query
        
    def get_hashtags(self) -> list:
        return self.hashtags
    
    def set_hashtags(self, hashtags: list) -> None:
        self.hashtags = hashtags
        
    def get_max_results(self) -> int:
        return self.max_results
        
    def set_max_results(max_results: int) -> None:
        self.max_results = max_results
        
    @property
    def api_url(self) -> str:
        return self.url_base.format(self.query, self.max_results)
    
    def get_tweets(self) -> list:
        """Returns a list containing text attributes of scraped tweets"""
        session = requests.Session()
        response = session.get(self.api_url, headers=self.headers)
        session.close()
        
        tweets = response.json()["data"]
        return [tweet["text"] for tweet in tweets]

In [5]:
_HASHTAGS = ["polskilad", "polskiwal", "nowylad", "nowywal", "drozyznapis"]
_MAX_RESULTS = 100

In [6]:
conn = ApiConnector(_HASHTAGS,
                   _MAX_RESULTS,
                   config["bearer_token"])

results = conn.get_tweets()
print(results[:10])  # print first 10 results

['Myślę, że od 2023 roku #PolskiLad zyska nowego patcha, który będzie miał za zadanie usunąć wszystkie bugi znalezione do tej pory przez userów.\n#PolskiWał #NowyWał #NowyŁad #drozyznapis #inflacja', 'CO O TYM SĄDZICIE?\n\n#PolskiLad #PolskiNieład #PolskiWał #PolskiŁad #NowyWał #NowyŁad #inflacja #drozyznapis https://t.co/y7JJLiz5dt', 'Panie @CzarnekP czy pokaże Pan konsekwencję i  odwoła kurator Barbarę Nowak ? Wiem , wiem za dużo wymagam - swoich nie wolno ruszać.\n#PolskiLad #PolskiNieład #PolskiWał #PolskiŁad #NowyWał #NowyŁad #inflacja #drozyznapis https://t.co/ZDiCrxqx5o']


In [10]:
conn.api_url

'https://api.twitter.com/2/tweets/search/recent?query=%23polskilad%20%23polskiwal%20%23nowylad%20%23nowywal%20%23drozyznapis&max_results=100'

In [24]:
# =============================================================
# SENTIMENT ANALYSIS
# =============================================================

In [22]:
# =============================================================
# MONGODB CONNECTION
# =============================================================

In [13]:
import pymongo
import certifi

In [16]:
# connect to mongodb
ca = certifi.where()
user = config["mongodb"]["user"]
password = config["mongodb"]["password"]

_MONGO_CLIENT = pymongo.MongoClient(f"mongodb+srv://{user}:{password}@test.s8kmr.mongodb.net", tlsCAFile=ca)

In [17]:
# connect to collection
db_name = config["mongodb"]["db"]
db = _MONGO_CLIENT[db_name]

col_name = config["mongodb"]["collection"]
collection = db[col_name]

In [20]:
def save_result(result: dict, coll: pymongo.collection.Collection) -> None:
    """Save dictionary with sentiment result to mongodb collection"""
    try:
        coll.insert_one(result)
    except Exception as e:
        print(e)    

In [21]:
# TEST
r = {"id": "test"}
save_result(r, collection)