# Tutorial


In [1]:
# Importing our own models
import helper
from analyzer import Analyzer
from crawler import Crawler
from plotter import Plotter
from model import TextBlob, TrainedSentimentModel, Vader

# Additional imports
import datetime

In [2]:
config = {
	"user_auth": False, # autheticate as user or application
	"auth_path": "/home/maxi/Documents/UNI/Ethics/Project/repo/Corona_Sentinent/", # path to auth
    "search": {
        "location": "darmstadt", # based on helper.GEOCODES dictionary
        "radius": 100, # optional default to 100
        "query": ['die', 'der', 'das'], # query for searching (str array), either query or location has to be not empty
        "max_searches": 1000, # Default: 1000 max amount of searches 
        "num_results": 10, # number of results with defined filter options
        "rate_limit": False,  # Default True: to turn off rate limit prints
        "filter": { # Filter applies to search
            "not_reply": True, # Filter removes replies when true, does nothing when false
            "not_retweet": True, # Filter removes retweets when true, does nothing when false
            "until": datetime.datetime(2020, 3, 1), # None or datetime (e.g. datetime.datetime(2020, 5, 20))
                                                    # to filter tweets until specified time.
        }
    },
    "analyze_sentiment": {
#         "pos_boundary": 0.5, # boundary for classifying tweets as "extremely" positive
#         "neg_boundary": 0.5, # boundary for classifying tweets as "extremely" negative
        "users_dir": "saved_data/full_scan_both/results/all/"  # to store sentiment analysis files for future
    },
	"get_user": { # Optional, mandatory only when querying for users
		"good_user": True, # mandatory: Filtering users with too many / too few tweets
		"search_type": "recent_user", # 'recent_user', 'recent_retweeted_user'
		"num_users": 40, # mandatory: How many users to return in search
        "unique_ids": True, # If True will remember user ids in session for several scans
	},
	"plot": {
		"title": "Testing",
	},
}


In [3]:
# Creates full config file, important to run everytime something inside the config has changed!
config = helper.init_config(config)

# Creating the crawler and the plotter objects based on the configs
crawler = Crawler(config)
plotter = Plotter(config)

In [4]:
""" The first and easiest way to get to data is to use this call. Based on your configurations it will 
give you different tweets. By changing the config file and realoading the crawler (previous cell), you can
quickly change the results """
tweets = crawler.get_tweets()




In [5]:
""" tweets is now a list of tweepy Status objects """
print(len(tweets))
print(type(tweets[0]))

""" Using the properties of this object one can get information about the tweet, e.g. """
print(tweets[0].text)
print(tweets[0].id)

10
<class 'tweepy.models.Status'>
Ende November soll in #Kefenrod ein Flurbereinigungsverfahren eingeleitet werden. Der Kreis-Anzeiger erklärt, was d… https://t.co/NmO4vJ9meQ
1320395975123476480


In [None]:
""" For a more advanced scraping of data, we crawl random users which have
tweeted recently and add those user ids to our list. This step can take some time, 
dependent on the """
users = crawler.get_users()

In [None]:
""" With the list of those users, we can look at the timeline of each user."""
timeline = crawler.get_timeline(users[0])

In [None]:
""" Timeline is a list of tweets only from this user """
print("Number of tweets on timeline: ", len(timeline))
print(type(timeline[0]))

In [None]:
"""
In order to analyze our scraped data we load different models to run the analyzation on.
We chose 2 pre trained models: TextBlob and Vader. TextBlob is the most
simple model, Vader a little bit more advanced and then our model, loaded
here with TrainedSentimentModel, which is optimized on German Tweets.
"""

# Load 3 different models
blob_model = TextBlob()
vader_model = Vader()
our_model = TrainedSentimentModel()

# Configure which model should be used for analyzation
analyzer_blob = Analyzer(config, blob_model)
analyzer_vader = Analyzer(config, blob_model)
analyzer_our = Analyzer(config, blob_model)

In [None]:
""" With this simple script we analyze the timeline of all users and plot those on 
a simple histogram to analyze the overall sentiment """
sentis = analyzer_blob.analyze_sentiment(timeline)
plotter.simple_hist(sentis)

sentis = analyzer_vader.analyze_sentiment(timeline)
plotter.simple_hist(sentis)

sentis = analyzer_our.analyze_sentiment(timeline)
plotter.simple_hist(sentis)