In [2]:
import snscrape.modules.twitter as sntwitter
import pandas as pd
import os
from datetime import datetime, timedelta
import time

In [6]:
def get_tweets(query, limit, columns):
  tweets = []
  count = 0
  for tweet in sntwitter.TwitterSearchScraper(query).get_items():
    count += 1
    if count % 500 == 0:
      print(str(count) + " tweets downloaded...")

    if len(tweets) == limit:
        break
    else:
        tweets.append([tweet.date, tweet.content.replace('\n', '\\n').replace('\t', '\\t')])
  
  return pd.DataFrame(tweets, columns=columns)

def save_tweets_to_csv(tweet_df, path, filename):
  if not os.path.exists(path):
    os.makedirs(path)
  tweet_df.to_csv(path + '/' + filename)
  print('Saved tweets to ' + path + '/' + filename)
    
def collect_tweets(start_date, end_date, limit, columns, query, dir_path, filename):
    
    result_df = pd.DataFrame(columns=columns)
    starting_time = time.time()
    current_date = start_date
    while (current_date < end_date):
        next_day = current_date + timedelta(days=1)
        search_query = query + f" -filter:retweets -filter:links until:{str(next_day).split(' ')[0]}  since:{str(current_date).split(' ')[0]}"
        tweets = get_tweets(search_query, limit, columns)
        result_df = pd.concat([result_df, tweets])
        print('Appended ' + str(len(tweets)) + ' tweets from ' + str(current_date))
        print('Time elapsed: ' + str(time.time() - starting_time) + ' seconds.')
        current_date = next_day
    
    init_length = len(result_df)
    result_df.drop_duplicates(subset=['text'], inplace=True)
    print(f"{init_length - len(result_df)} duplicates dropped")

    save_tweets_to_csv(result_df, dir_path, filename)
    
    

In [7]:
start_date = datetime(2021, 12, 24)
end_date = datetime(2022, 5, 24)
columns = ["Date", "text"]

## Question 1

In [8]:
collect_tweets(start_date, end_date, 500, columns, "russia ukraine lang:en", 'output/q1', 'all_tweets.csv')

500 tweets downloaded...
Appended 500 tweets from 2021-12-24 00:00:00
Time elapsed: 18.326768159866333 seconds.
500 tweets downloaded...
Appended 500 tweets from 2021-12-25 00:00:00
Time elapsed: 36.37900424003601 seconds.
500 tweets downloaded...
Appended 500 tweets from 2021-12-26 00:00:00
Time elapsed: 54.0063111782074 seconds.
500 tweets downloaded...
Appended 500 tweets from 2021-12-27 00:00:00
Time elapsed: 72.29681515693665 seconds.
500 tweets downloaded...
Appended 500 tweets from 2021-12-28 00:00:00
Time elapsed: 91.5130820274353 seconds.
500 tweets downloaded...
Appended 500 tweets from 2021-12-29 00:00:00
Time elapsed: 110.26443791389465 seconds.
500 tweets downloaded...
Appended 500 tweets from 2021-12-30 00:00:00
Time elapsed: 128.1427140235901 seconds.
500 tweets downloaded...
Appended 500 tweets from 2021-12-31 00:00:00
Time elapsed: 147.05149102210999 seconds.
500 tweets downloaded...
Appended 500 tweets from 2022-01-01 00:00:00
Time elapsed: 164.40773510932922 seconds.

## Question 2

### English

In [10]:
collect_tweets(start_date, end_date, 250, columns, "putin -zelensky lang:en", 'output/q2', 'putin_english.csv')

Appended 250 tweets from 2021-12-24 00:00:00
Time elapsed: 8.73956298828125 seconds.
Appended 250 tweets from 2021-12-25 00:00:00
Time elapsed: 18.12817907333374 seconds.
Appended 250 tweets from 2021-12-26 00:00:00
Time elapsed: 27.091392993927002 seconds.
Appended 250 tweets from 2021-12-27 00:00:00
Time elapsed: 36.06103825569153 seconds.
Appended 250 tweets from 2021-12-28 00:00:00
Time elapsed: 44.90437602996826 seconds.
Appended 250 tweets from 2021-12-29 00:00:00
Time elapsed: 53.79856514930725 seconds.
Appended 250 tweets from 2021-12-30 00:00:00
Time elapsed: 62.38889026641846 seconds.
Appended 250 tweets from 2021-12-31 00:00:00
Time elapsed: 71.47194504737854 seconds.
Appended 250 tweets from 2022-01-01 00:00:00
Time elapsed: 80.68848896026611 seconds.
Appended 250 tweets from 2022-01-02 00:00:00
Time elapsed: 89.92658424377441 seconds.
Appended 250 tweets from 2022-01-03 00:00:00
Time elapsed: 98.86288404464722 seconds.
Appended 250 tweets from 2022-01-04 00:00:00
Time elap

In [11]:
collect_tweets(start_date, end_date, 250, columns, "zelensky -putin lang:en", 'output/q2', 'zelensky_english.csv')

Appended 60 tweets from 2021-12-24 00:00:00
Time elapsed: 3.5430400371551514 seconds.
Appended 32 tweets from 2021-12-25 00:00:00
Time elapsed: 6.577473878860474 seconds.
Appended 32 tweets from 2021-12-26 00:00:00
Time elapsed: 9.444244146347046 seconds.
Appended 56 tweets from 2021-12-27 00:00:00
Time elapsed: 12.968096017837524 seconds.
Appended 63 tweets from 2021-12-28 00:00:00
Time elapsed: 16.73518705368042 seconds.
Appended 171 tweets from 2021-12-29 00:00:00
Time elapsed: 23.774842023849487 seconds.
Appended 250 tweets from 2021-12-30 00:00:00
Time elapsed: 32.29652500152588 seconds.
Appended 112 tweets from 2021-12-31 00:00:00
Time elapsed: 37.67359209060669 seconds.
Appended 50 tweets from 2022-01-01 00:00:00
Time elapsed: 41.109508991241455 seconds.
Appended 109 tweets from 2022-01-02 00:00:00
Time elapsed: 46.35728096961975 seconds.
Appended 140 tweets from 2022-01-03 00:00:00
Time elapsed: 52.49887824058533 seconds.
Appended 36 tweets from 2022-01-04 00:00:00
Time elapsed

In [12]:
collect_tweets(start_date, end_date, 250, columns, "nato -putin -zelensky lang:en", 'output/q2', 'nato_english.csv')

Appended 250 tweets from 2021-12-24 00:00:00
Time elapsed: 10.235174179077148 seconds.
Appended 250 tweets from 2021-12-25 00:00:00
Time elapsed: 20.058878898620605 seconds.
Appended 250 tweets from 2021-12-26 00:00:00
Time elapsed: 30.74800205230713 seconds.
Appended 250 tweets from 2021-12-27 00:00:00
Time elapsed: 40.163536071777344 seconds.
Appended 250 tweets from 2021-12-28 00:00:00
Time elapsed: 49.56087613105774 seconds.
Appended 250 tweets from 2021-12-29 00:00:00
Time elapsed: 59.60997414588928 seconds.
Appended 250 tweets from 2021-12-30 00:00:00
Time elapsed: 69.06878805160522 seconds.
Appended 250 tweets from 2021-12-31 00:00:00
Time elapsed: 79.54242610931396 seconds.
Appended 250 tweets from 2022-01-01 00:00:00
Time elapsed: 88.35621190071106 seconds.
Appended 250 tweets from 2022-01-02 00:00:00
Time elapsed: 98.08598804473877 seconds.
Appended 250 tweets from 2022-01-03 00:00:00
Time elapsed: 107.94519805908203 seconds.
Appended 250 tweets from 2022-01-04 00:00:00
Time 

### Russian

In [13]:
collect_tweets(start_date, end_date, 250, columns, "Путин -Зеленский lang:ru", 'output/q2', 'putin_russian.csv')

Appended 250 tweets from 2021-12-24 00:00:00
Time elapsed: 9.104002952575684 seconds.
Appended 250 tweets from 2021-12-25 00:00:00
Time elapsed: 18.419491052627563 seconds.
Appended 250 tweets from 2021-12-26 00:00:00
Time elapsed: 27.568722248077393 seconds.
Appended 250 tweets from 2021-12-27 00:00:00
Time elapsed: 37.08230400085449 seconds.
Appended 250 tweets from 2021-12-28 00:00:00
Time elapsed: 45.88728594779968 seconds.
Appended 250 tweets from 2021-12-29 00:00:00
Time elapsed: 55.27373719215393 seconds.
Appended 250 tweets from 2021-12-30 00:00:00
Time elapsed: 64.18189787864685 seconds.
Appended 250 tweets from 2021-12-31 00:00:00
Time elapsed: 73.34803009033203 seconds.
Appended 250 tweets from 2022-01-01 00:00:00
Time elapsed: 82.63632416725159 seconds.
Appended 250 tweets from 2022-01-02 00:00:00
Time elapsed: 91.5897970199585 seconds.
Appended 250 tweets from 2022-01-03 00:00:00
Time elapsed: 101.14511609077454 seconds.
Appended 250 tweets from 2022-01-04 00:00:00
Time el

In [14]:
collect_tweets(start_date, end_date, 250, columns, "Зеленский -Путин lang:ru", 'output/q2', 'zelensky_russian.csv')

Appended 210 tweets from 2021-12-24 00:00:00
Time elapsed: 7.992153167724609 seconds.
Appended 250 tweets from 2021-12-25 00:00:00
Time elapsed: 16.147468090057373 seconds.
Appended 154 tweets from 2021-12-26 00:00:00
Time elapsed: 22.662899255752563 seconds.
Appended 204 tweets from 2021-12-27 00:00:00
Time elapsed: 30.656819343566895 seconds.
Appended 136 tweets from 2021-12-28 00:00:00
Time elapsed: 36.63778305053711 seconds.
Appended 203 tweets from 2021-12-29 00:00:00
Time elapsed: 44.659802198410034 seconds.
Appended 219 tweets from 2021-12-30 00:00:00
Time elapsed: 52.746793270111084 seconds.
Appended 250 tweets from 2021-12-31 00:00:00
Time elapsed: 60.61864423751831 seconds.
Appended 250 tweets from 2022-01-01 00:00:00
Time elapsed: 68.46469235420227 seconds.
Appended 186 tweets from 2022-01-02 00:00:00
Time elapsed: 76.32776808738708 seconds.
Appended 242 tweets from 2022-01-03 00:00:00
Time elapsed: 85.41327118873596 seconds.
Appended 193 tweets from 2022-01-04 00:00:00
Time

In [15]:
collect_tweets(start_date, end_date, 250, columns, "НАТО -Зеленский -Путин lang:ru", 'output/q2', 'nato_russian.csv')

Appended 250 tweets from 2021-12-24 00:00:00
Time elapsed: 8.13637113571167 seconds.
Appended 250 tweets from 2021-12-25 00:00:00
Time elapsed: 16.604462146759033 seconds.
Appended 250 tweets from 2021-12-26 00:00:00
Time elapsed: 25.020915985107422 seconds.
Appended 250 tweets from 2021-12-27 00:00:00
Time elapsed: 33.5451922416687 seconds.
Appended 250 tweets from 2021-12-28 00:00:00
Time elapsed: 43.32107424736023 seconds.
Appended 250 tweets from 2021-12-29 00:00:00
Time elapsed: 51.795109033584595 seconds.
Appended 250 tweets from 2021-12-30 00:00:00
Time elapsed: 60.20516085624695 seconds.
Appended 250 tweets from 2021-12-31 00:00:00
Time elapsed: 68.05986309051514 seconds.
Appended 250 tweets from 2022-01-01 00:00:00
Time elapsed: 76.404226064682 seconds.
Appended 250 tweets from 2022-01-02 00:00:00
Time elapsed: 84.42023229598999 seconds.
Appended 250 tweets from 2022-01-03 00:00:00
Time elapsed: 92.98229813575745 seconds.
Appended 250 tweets from 2022-01-04 00:00:00
Time elaps

## Question 3

In [None]:
collect_tweets(start_date, end_date, 190, columns, "(russia OR ukraine) (to:FoxNews) lang:en", 'output/q3', 'fox_news.csv')

In [None]:
collect_tweets(start_date, end_date, 190, columns, "(russia OR ukraine) (to:nytimes) lang:en", 'output/q3', 'new_york_times.csv')

In [None]:
print(df)