# Tweet Collector
### Collects tweets by using the `Twitter API`
#### `Python 3.9.8`

In [3]:
from time import sleep
import os
import tweepy as tw
import csv

`Twitter API` Keys [*Retrieve and replace keys from `API Keys.zip` using the password provided*]

In [4]:
consumer_key = 'consumer_key'
consumer_secret = 'consumer_secret'
access_token = 'access_token'
access_token_secret = 'access_token_secret'

`Twitter API` Setup.

In [5]:
auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tw.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

Select `United Kingdom` as query constraint.

In [6]:
places = api.geo_search(query="United Kingdom", granularity="country")
place_id = places[2].id

## File i/o Method definitions
>### - `save_tweet_data(tweets)`:
>Takes a list of strings of tweet json data cast to string (*`tweets`*), and saves them to `collected_tweet_data.txt` separated with a new line character (`\n`).
***
>### - `remove_duplicate_tweets()`:
>Reads through `collected_tweet_data.txt` and removes any duplicate tweet data.

In [7]:
def save_tweet_data(tweets):
    if not os.path.exists('../Data/collected_tweet_data.txt'):
       with open('../Data/collected_tweet_data.txt', 'w', encoding="utf-8") as f:
           pass     
    with open('../Data/collected_tweet_data.txt', 'r', encoding="utf-8") as f:
        reader = csv.reader(f, delimiter='\n')
        existing = set()
        for line in reader:
            line_data = line[0]
            existing.add(str(line_data[:100]))
            
    with open('../Data/collected_tweet_data.txt', 'a', encoding="utf-8") as f:
        for tweet in tweets:
            if tweet.lang == 'en':
                tweet_data = str(tweet._json)
                if tweet_data[:100] not in existing:
                    f.write(tweet_data)
                    f.write('\n')
                    
def remove_duplicate_tweets():
    if os.path.exists('../Data/collected_tweet_data.txt'):
        with open('../Data/collected_tweet_data.txt', 'r+', encoding="utf-8") as f:
            reader = csv.reader(f, delimiter='\n')
            existing = set()
            tweet_data = []
            
            for line in reader:
                line_data = line[0]
                existing.add(str(line_data[:100]))
                tweet_data.append(str(line_data))

        with open('../Data/collected_tweet_data.txt', 'r+', encoding="utf-8") as f:
            for tweet in tweet_data:
                if tweet[:100] in existing:
                    existing.remove(tweet[:100])
                    f.write(tweet)
                    f.write('\n')

### `get_tweets`():
Method to retrieve tweets and consequently save them.

In [8]:
def get_tweets():
  # Retrieve tweets
  try:
    tweets = api.search(q="place:%s -filter:retweets" % place_id)
  except Exception as e:
    print(e)
  
  # Save tweets
  save_tweet_data(tweets)

Run to start collecting tweets automatically within API Rate limits

In [9]:
try:
    while True:
        get_tweets()
        sleep(15)
except KeyboardInterrupt:
    print('Collection halted.')

Collection halted.
