# Importing Libraries
### pandas: for data manipulation and analysis.
### ntscraper: for scraping tweets from Twitter via Nitter.
### re for regular: expressions, used for pattern matching.
### datetime and timedelta: for handling date and time.
### time: for time-related operations in our case put delay in functions.

In [54]:
import pandas as pd
from ntscraper import Nitter
import re
from datetime import datetime, timedelta
import time

### This line initializes the Nitter scraper for scraping tweets.

In [84]:
scraper = Nitter()

Testing instances:  92%|██████████████████████████████████████████████████████████     | 71/77 [01:25<00:05,  1.15it/s]

03-May-24 17:56:15 - Certificate did not match expected hostname: nt.ggtyler.dev. Certificate: {'subject': ((('commonName', '4g.ggtyler.dev'),),), 'issuer': ((('countryName', 'US'),), (('organizationName', "Let's Encrypt"),), (('commonName', 'R3'),)), 'version': 3, 'serialNumber': '048FA3D68DCD65927A6C57C2A3C9C1F8CA14', 'notBefore': 'Mar 15 00:26:35 2024 GMT', 'notAfter': 'Jun 13 00:26:34 2024 GMT', 'subjectAltName': (('DNS', '4g.ggtyler.dev'),), 'OCSP': ('http://r3.o.lencr.org',), 'caIssuers': ('http://r3.i.lencr.org/',)}


Testing instances:  94%|██████████████████████████████████████████████████████████▉    | 72/77 [01:25<00:03,  1.31it/s]

03-May-24 17:56:16 - Certificate did not match expected hostname: nitter.uni-sonia.com. Certificate: {'subject': ((('commonName', '*.xserver.jp'),),), 'issuer': ((('countryName', 'JP'),), (('organizationName', 'CloudSecure Corporation'),), (('commonName', 'CloudSecure RSA Domain Validation Secure Server CA 2'),)), 'version': 3, 'serialNumber': 'ACA67AD2030638EE2DCE8E845B8299A6', 'notBefore': 'Mar 11 00:00:00 2024 GMT', 'notAfter': 'Apr 11 23:59:59 2025 GMT', 'subjectAltName': (('DNS', '*.xserver.jp'), ('DNS', 'xserver.jp')), 'OCSP': ('http://ocsp.sectigo.com',), 'caIssuers': ('http://crt.sectigo.com/CloudSecureRSADomainValidationSecureServerCA2.crt',)}


Testing instances:  99%|██████████████████████████████████████████████████████████████▏| 76/77 [01:30<00:01,  1.08s/it]

03-May-24 17:56:20 - Certificate did not match expected hostname: nitter.tinfoil-hat.net. Certificate: {'subject': ((('commonName', 'jelly.tinfoil-hat.de'),),), 'issuer': ((('countryName', 'US'),), (('organizationName', "Let's Encrypt"),), (('commonName', 'R3'),)), 'version': 3, 'serialNumber': '03F338CE809E122DC2875C50A27A840DD7A4', 'notBefore': 'Mar 15 22:40:16 2024 GMT', 'notAfter': 'Jun 13 22:40:15 2024 GMT', 'subjectAltName': (('DNS', 'jelly.tinfoil-hat.de'),), 'OCSP': ('http://r3.o.lencr.org',), 'caIssuers': ('http://r3.i.lencr.org/',)}


Testing instances: 100%|███████████████████████████████████████████████████████████████| 77/77 [01:30<00:00,  1.18s/it]


### This Function extracts Twitter usernames from profile URLs. It employs regular expressions to extract the username from the URLs.

In [7]:
def extract_username(profile_links):
    usernames = []
    for link in profile_links:
        match = re.search(r'twitter.com/([A-Za-z0-9_]+)', link)
        if match:
            username = match.group(1)
            usernames.append(username)
        else:
            print("Invalid Twitter profile link:", link)
    return usernames

### The Nitter scraper is used in this function to retrieve tweets from specified Twitter accounts. It then parses the tweets to extract important information and counts the occurrences of specific tickers within a defined time range. Finally, it produces an output message that summarises the ticker mentions and the total number of dollar signs (stocks) stated.

In [158]:
def get_tweets(accounts, tickers, no_tweets, time_period_minutes):
    final_tweets = []
    total_dollar_signs = 0
    for account in accounts:
        tweets = scraper.get_tweets(account, mode="user", number=no_tweets)
        time.sleep(10)
        for tweet in tweets['tweets']:
            final_tweets.append([tweet['user']['username'], tweet['text'], tweet['date'], tweet['link']])
            total_dollar_signs += tweet['text'].count('$')
    
    data = pd.DataFrame(final_tweets, columns=['username', 'text', 'date', 'link'])
    
    data['date'] = pd.to_datetime(data['date'], format='%b %d, %Y · %I:%M %p %Z')

    ticker_counts = {}
    
    current_time = datetime.now().astimezone(data['date'].dt.tz)
    time_threshold = current_time - timedelta(minutes=time_period_minutes)

    filtered_data = data[data['date'] >= time_threshold]
    for ticker in tickers:
        ticker_counts[ticker] = filtered_data['text'].str.count(r'\$' + re.escape(ticker)).sum()

    output_message = ""
    for ticker, count in ticker_counts.items():
        output_message += f"'${ticker}' was mentioned '{count}' times in the last '{time_period_minutes}' minutes.\n"
    
    output_message += f"Total '$' signs mentioned across all tweets: {total_dollar_signs}"
    
    return data, filtered_data, output_message

### This function saves DataFrame to a CSV file with the date of the ittration and time in its name.

In [163]:
def save_to_csv(data, filename):
    data.to_csv(filename, index=False)

### The input parameters for the get_tweets() function are defined here: Twitter profile URLs, tickers to search for, time period for filtering tweets, and the amount of tweets to obtain for each account.

In [153]:
profile_links = [
    "https://twitter.com/Mr_Derivatives",
    "https://twitter.com/warrior_0719",
    "https://twitter.com/yuriymatso",
    "https://twitter.com/ChartingProdigy",
    "https://twitter.com/allstarcharts",
    "https://twitter.com/TriggerTrades",
    "https://twitter.com/AdamMancini4",
    "https://twitter.com/CordovaTrades",
    "https://twitter.com/Barchart",
    "https://twitter.com/RoyLMattox"
]

usernames = extract_username(profile_links)
print("Usernames extracted:", usernames)

tickers = ['TSLA', 'SPX', 'Ibit', 'PYPL']
time_period_minutes = 5 #last 5 minutes
no_tweets = 15

Usernames extracted: ['Mr_Derivatives', 'warrior_0719', 'yuriymatso', 'ChartingProdigy', 'allstarcharts', 'TriggerTrades', 'AdamMancini4', 'CordovaTrades', 'Barchart', 'RoyLMattox']


### This section of the code generates a loop that continually monitors tweets, retrieves data, and saves it to a CSV file. It asks the user at the end of the loop if they want to stop it or not, and it waits 90 seconds for the following cycle. If stopped, it exits the loop and terminates the monitoring process.

In [164]:
while True:
    current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    tweets_df, filtered_tweets_df, output_message = get_tweets(usernames, tickers, no_tweets, time_period_minutes)
    print(output_message)
    
    save_to_csv(tweets_df, f"tweets_{current_time}.csv")
    
    stop_command = input("Do you want to stop? (Enter 'N' for NO, 'Y' for YES): ")
    if(stop_command.lower() == 'y'):
        print("Task finished")
        break;
    else:
        print("wait for 90 seconds for the next iteration")
        time.sleep(90)
        continue
    print("Stopping tweet monitoring loop.")
    break

03-May-24 20:56:35 - No instance specified, using random instance https://nitter.privacydev.net
03-May-24 20:56:41 - Current stats for Mr_Derivatives: 15 tweets, 0 threads...
03-May-24 20:56:51 - No instance specified, using random instance https://nitter.privacydev.net
03-May-24 20:56:55 - Current stats for warrior_0719: 15 tweets, 0 threads...
03-May-24 20:57:05 - No instance specified, using random instance https://nitter.esmailelbob.xyz
03-May-24 20:57:06 - Fetching error: Instance has been rate limited.Use another instance or try again later.
03-May-24 20:57:18 - No instance specified, using random instance https://nitter.privacydev.net
03-May-24 20:57:23 - Current stats for ChartingProdigy: 15 tweets, 0 threads...
03-May-24 20:57:33 - No instance specified, using random instance https://nitter.privacydev.net
03-May-24 20:57:39 - Current stats for allstarcharts: 15 tweets, 0 threads...
03-May-24 20:57:49 - No instance specified, using random instance https://nitter.privacydev.net
