<a href="https://colab.research.google.com/github/Subarashiihibi/geosearch/blob/main/geosearch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# created on Dec 24, 2020
# modified on April 22, 2022
# @author:          Steven Chen
# @email:           chenj274@uw.edu
# @organization:    Department of Geography, University of Washington, Seattle
# @description:     Search geo-tagged tweets within the U.S. This script is modified from https://github.com/shawn-terryah/Twitter_Geolocation

In [None]:
import tweepy, json, time
import pandas as pd
from google.colab import files
# Create data on to Google Drive
from google.colab import drive
# Mount your Drive to the Colab VM.
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [None]:
class StreamListener(tweepy.StreamListener):
    """tweepy.StreamListener is a class provided by tweepy used to access
    the Twitter Streaming API to collect tweets in real-time.
    """

    def __init__(self, time_limit=60, file=""):
        """class initialization"""
        self.start_time = time.time()
        self.limit = time_limit
        self.result = []
        self.f = file
        super(StreamListener, self).__init__()

    def on_data(self, data):
        """This is called when data are streamed in."""
        if (time.time() - self.start_time) < self.limit:
            datajson = json.loads(data)
            # print(datajson, "\n")
            if 'id' not in datajson.keys():
                time.sleep(10)
            else:
                id = datajson['id']
                username = datajson['user']['screen_name']
                created_at = datajson['created_at']
                text = datajson['text'].strip().replace("\n", "")

                # process the geo-tags
                if datajson['coordinates'] == None:
                    try:
                        bbox = datajson['place']['bounding_box']['coordinates'][0]
                        lng = (bbox[0][0] + bbox[2][0]) / 2.0
                        lat = (bbox[0][1] + bbox[1][1]) / 2.0
                    except:
                        lat = 0
                        lng = 0
                else:
                    lng = datajson['coordinates']['coordinates'][0]
                    lat = datajson['coordinates']['coordinates'][1]

                if lat != 0:
                    row = {
                        'id': id,
                        'username': username,
                        'created_at': created_at,
                        'lng': lng,
                        'lat': lat,
                        'text': text
                    }
                    print(row)
                    self.result.append(row)
                else:
                    pass
        else:
            df = pd.DataFrame(self.result)
            df.to_csv(self.f, index=False)
            # download the csv to your local computer
            files.download(self.f)
            print("the csv has been downloaded to your local computer. The program has been completed successfully.")
            return False

In [20]:
if __name__ == "__main__":
    # These are provided to you through the Twitter API after you create a account
    # register a Twitter App to get the keys and access tokens.
    output_file1 = '/gdrive/My Drive/geotweets1.csv'
    output_file2 = '/gdrive/My Drive/geotweets2.csv'
    # Apply for your own Twitter API keys at https://developer.twitter.com/en/apply-for-access
    consumer_key = "consumer_key"
    consumer_secret = "consumer_secret"
    access_token = "access_token"
    access_token_secret = "access_token_secret"

    myauth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    myauth.set_access_token(access_token, access_token_secret)

    # LOCATIONS are the longitude, latitude coordinate corners for a box that restricts the
    # geographic area from which you will stream tweets. The first two define the southwest
    # corner of the box and the second two define the northeast corner of the box.
    LOCATION1 = [-124.7771694, 24.520833, -66.947028, 49.384472] # Contiguous US
    LOCATION2 = [ -5.688361, 50.004462, -0.834439, 60.861307] # Britain

    stream_listener1 = StreamListener(time_limit=60, file=output_file1)
    stream1 = tweepy.Stream(auth=myauth, listener=stream_listener1)
    stream1.filter(locations=LOCATION1, languages=['en'], encoding="utf-8", track=["Cats"])

    stream_listener2 = StreamListener(time_limit=60, file=output_file2)
    stream2 = tweepy.Stream(auth=myauth, listener=stream_listener2)
    stream2.filter(locations=LOCATION1, languages=['en'], encoding="utf-8", track=["Dogs"])

{'id': 1517726155197095938, 'username': 'bellawasson', 'created_at': 'Sat Apr 23 04:44:48 +0000 2022', 'lng': -98.319925, 'lat': 38.498196, 'text': 'was anyone else obsessed with that disney movie called mickey‘s house of villains or was that just me'}
{'id': 1517726156384083973, 'username': 'JoshuaBaer', 'created_at': 'Sat Apr 23 04:44:48 +0000 2022', 'lng': -97.754724, 'lat': 30.323345500000002, 'text': 'I don’t think I truly understood @join_royal until tonight.'}
{'id': 1517726156631515136, 'username': '511NY', 'created_at': 'Sat Apr 23 04:44:48 +0000 2022', 'lng': -72.653956, 'lat': 41.567614, 'text': 'Cleared: Construction on #CT66 EB at Arrigoni Bridge'}
{'id': 1517726157046640640, 'username': 'tysonxdiego', 'created_at': 'Sat Apr 23 04:44:48 +0000 2022', 'lng': -80.2333285, 'lat': 25.7823535, 'text': 'tired of being a joe want a girl i slut out every night 😭😍😭⁉️😭⁉️😭⁉️'}
{'id': 1517726157533335552, 'username': 'AuroraMoonrise', 'created_at': 'Sat Apr 23 04:44:48 +0000 2022', 'ln

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

the csv has been downloaded to your local computer. The program has been completed successfully.
{'id': 1517726410323996674, 'username': 'rileycoffman10', 'created_at': 'Sat Apr 23 04:45:48 +0000 2022', 'lng': -102.344087, 'lat': 31.873838499999998, 'text': '@yessiii_d Mr. MLB himself'}
{'id': 1517726410336456704, 'username': 'nurseandnewf', 'created_at': 'Sat Apr 23 04:45:48 +0000 2022', 'lng': -111.8808735, 'lat': 33.282450499999996, 'text': 'Ready to watch chugging vids 😂'}
{'id': 1517726411460464640, 'username': 'cambridgemike', 'created_at': 'Sat Apr 23 04:45:49 +0000 2022', 'lng': -120.664965, 'lat': 35.274809000000005, 'text': '@scooperhoops what happened to their threes 😫 Mikal doesn’t even take the shots anymore, that was his jam last year'}
{'id': 1517726412647526400, 'username': 'alex_harmon', 'created_at': 'Sat Apr 23 04:45:49 +0000 2022', 'lng': -121.9145285, 'lat': 37.715531, 'text': '@cassmoni_ It’s a beauty in its own way.'}
{'id': 1517726412949622784, 'username': 'Hern

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

the csv has been downloaded to your local computer. The program has been completed successfully.
