In [1]:
"""Real-time analysis project - tweeter sentiment analysis"""

'Real-time analysis project - tweeter sentiment analysis'

In [2]:
import requests
import json

In [3]:
# =============================================================
# CONFIGURATION
# =============================================================
with open("config.json", "r", encoding='utf-8') as conf:
    config = json.load(conf)

In [4]:
class ApiConnector:
    """Object providing methods for tweeter data scraping based on hashtag list provided by user"""
    
    url_base = "https://api.twitter.com/2/tweets/search/recent?query={}&max_results={}"
    
    def __init__(self, mandatory_hashtags: list, max_results: int, bearer_token: str, *optional_hashtags):
        self.hashtags = mandatory_hashtags
        self.optional_hashtags = optional_hashtags
        self.max_results = max_results
        self.headers = {"Authorization": f"Bearer {bearer_token}"}
        
    @property
    def query(self) -> str:
        
        _query_list = ["%23" + self.hashtags[0]]
        
        for tag in self.hashtags[1:]:
            _query_list.append("%20%23" + tag)  # add mandatory hashtags
        
        for opt_tag in self.optional_hashtags:
            _query_list.append("%20OR%20%23" + opt_tag)  # add optional hashtags
            
        _query = "".join(_query_list)
        return _query
        
        
    def get_hashtags(self) -> list:
        return self.hashtags
    
    def set_hashtags(self, hashtags: list):
        self.hashtags = hashtags
        
    def get_max_results(self) -> int:
        return self.max_results
        
    def set_max_results(max_results: int):
        self.max_results = max_results
        
    @property
    def api_url(self) -> str:
        return self.url_base.format(self.query, self.max_results)
    
    def get_tweets(self) -> list:
        """Returns a list containing text attributes of scraped tweets"""
        session = requests.Session()
        response = session.get(self.api_url, headers=self.headers)
        session.close()
        
        tweets = response.json()["data"]
        return [tweet["text"] for tweet in tweets]

In [5]:
_HASHTAGS = ["polskilad", "polskiwal", "nowylad", "nowywal", "drozyznapis"]
_MAX_RESULTS = 100

In [9]:
conn = ApiConnector(_HASHTAGS,
                   _MAX_RESULTS,
                   config["bearer_token"],
                   "pis", "tvp")

results = conn.get_tweets()
print(results[:10])  # print first 10 results

['RT @news_psd: #PIS I co, fajnie jest?! Macie ruinƒô, w kt√≥rƒÖ nie wierzyli≈õcie. "Polski wa≈Ç", inflacja, dro≈ºyzna, covid https://t.co/P4KgP9xT‚Ä¶', 'RT @news_psd: #PIS I co, fajnie jest?! Macie ruinƒô, w kt√≥rƒÖ nie wierzyli≈õcie. "Polski wa≈Ç", inflacja, dro≈ºyzna, covid https://t.co/P4KgP9xT‚Ä¶', 'RT @Bart_Wielinski: Czy #Kaczy≈Ñski "p√≥jdzie siedzieƒá"? Sam prezes #PiS siƒô tego boi i przestrzega przed oddaniem w≈Çadzy \n\nWg. analizy prawn‚Ä¶', 'RT @pisorgpl: üí¨ Prezes #PiS, Wicepremier J. #Kaczy≈Ñski dla @Tygodnik_Sieci o ataku hybrydowym na polskiej granicy: Na dzisiaj mo≈ºna powiedz‚Ä¶', 'RT @Wiesci24pl: PiS pobi≈Ç Nixona. Stworzy≈Ç aferƒô wiƒôkszƒÖ ni≈º Watergate https://t.co/qMH2le1Qm5 #NigdywiecejPiS #Pegasus #PiS #SilniRazem', 'RT @esctoday: Poland: TVP unveils national final competing acts \nhttps://t.co/3jkXgPagXj #eurovision #Poland #TVP', '#PIS cieknie, przecieka, topi siƒô w oczach! Czy≈ºby to ju≈º koniec by≈Ç? #cieczkaWPis https://t.co/rHBy9oJKlk Czekam, kiedy i

'%23polskilad%20%23polskiwal%20%23nowylad%20%23nowywal%20%23drozyznapis%20OR%20%23pis%20OR%20%23tvp'