# TVDB - Getting Tweets about TV Shows

Twitter implements OAuth 1.0A as its standard authentication mechanism, and in order to use it to make requests to Twitter's API, you'll need to go to https://dev.twitter.com/apps and create a sample application. 

Twitter examples from the python-twitter API  [https://github.com/ideoforms/python-twitter-examples](https://github.com/ideoforms/python-twitter-examples)  

# Authorizing an application to access Twitter account data

In [2]:
import pandas as pd
import twitter # pip install twitter

# Go to http://dev.twitter.com/apps/new to create an app and get values
# for these credentials, which you'll need to provide in place of these
# empty string values that are defined as placeholders.
# See https://dev.twitter.com/docs/auth/oauth for more information 
# on Twitter's OAuth implementation.

CONSUMER_KEY = 'OEI2FGg6CmqvegsnMfLrj2oNf'
CONSUMER_SECRET = '9TQcPiWBVFUCKtCFIuXd25GKiI270dPxppC8oI5jN0GlewKeI8'
OAUTH_TOKEN = '104745048-sD2ixBTZYgYnd0amOqQKZmkTv7cybefY44bAnnkI'
OAUTH_TOKEN_SECRET = '3Eq217JOxKUmBl233fZjjyRdCLrmvWQWz2P5xh1EheYdb'

auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                           CONSUMER_KEY, CONSUMER_SECRET)

twitter_api = twitter.Twitter(auth=auth)

# Nothing to see by displaying twitter_api except that it's now a
# defined variable

print(twitter_api)

<twitter.api.Twitter object at 0x0000020322C07A58>


## Get the list of TV shows (which we will search Twitter for) using BeautifulSoup

In [5]:
from requests import get
from bs4 import BeautifulSoup as bs

url = "https://www.imdb.com/search/title?title_type=tv_series"

response = get(url)
response

html_soup = bs(response.text,'html.parser')
type(html_soup)


id_check = html_soup.find(id ="main")
tv_show_container = id_check.find_all(class_ ="lister-item mode-advanced")
len(tv_show_container)

container = tv_show_container[0]

#list to store scraped value data in:
tv_show_names = []


for container in tv_show_container:
    
    if container.find("div", class_ = "ratings-bar") is not None:
        
        name = container.h3.a.text
        tv_show_names.append(name)

#Store the list of TV Shows in a data frame
TVShowDF = pd.DataFrame({"tv_show_names":tv_show_names})
print(TVShowDF)

                        tv_show_names
0                        The Punisher
1                       Sex Education
2                                 You
3                     Game of Thrones
4                      True Detective
5                              Titans
6                   Grace and Frankie
7                             Vikings
8                         The Orville
9                Star Trek: Discovery
10                       Black Mirror
11                          Outlander
12                      American Gods
13                     Grey's Anatomy
14                 Brooklyn Nine-Nine
15                       Supernatural
16               American Crime Story
17                        The Passage
18                          Riverdale
19                          Shameless
20                     Peaky Blinders
21                     The Good Place
22                             Gotham
23                   The Walking Dead
24                         The Office
25          

## Create a new column in the TVShowDF to hold hashtags which we will search on Twitter

In [7]:
#Create a new column called "hashtag" in the dataframe and remove all spaces
TVShowDF['hashtag'] = TVShowDF.tv_show_names.str.replace(' ','')
#Remove non alphabetic characters
TVShowDF['hashtag'] = TVShowDF.hashtag.str.replace('[^a-zA-Z]', '')
#Prefix "#" symbol to all the values in the hashtag column
TVShowDF['hashtag'] = '#'+TVShowDF.hashtag
TVShowDF.hashtag

0                    #ThePunisher
1                   #SexEducation
2                            #You
3                  #GameofThrones
4                  #TrueDetective
5                         #Titans
6                #GraceandFrankie
7                        #Vikings
8                     #TheOrville
9              #StarTrekDiscovery
10                   #BlackMirror
11                     #Outlander
12                  #AmericanGods
13                  #GreysAnatomy
14              #BrooklynNineNine
15                  #Supernatural
16            #AmericanCrimeStory
17                    #ThePassage
18                     #Riverdale
19                     #Shameless
20                 #PeakyBlinders
21                  #TheGoodPlace
22                        #Gotham
23                #TheWalkingDead
24                     #TheOffice
25              #TheBigBangTheory
26                       #Lucifer
27            #FriendsfromCollege
28         #TheMarvelousMrsMaisel
29            

## Getting Tweets

In [11]:
#Set the number of Tweets that we want
n = 5000
from urllib.parse import unquote
# See https://dev.twitter.com/rest/reference/get/search/tweets
#create and empty list to hold the tweets
tweet_results = []

#loop through the list of hashtag and make an api call and search twitter for a hashtag in each iteration
for row in TVShowDF.index:
    #print(type(twitter_api.search.tweets(q=test_df.hashtag[row], count=n, lang='en')))
    tweet_results.append(twitter_api.search.tweets(q=TVShowDF.hashtag[row], count=n, lang='en'))
print(tweet_results[0])

{'statuses': [{'created_at': 'Wed Jan 30 23:53:56 +0000 2019', 'id': 1090760052053544960, 'id_str': '1090760052053544960', 'text': 'RT @JasonMooreENT: This is the second time you pulled a gun on Curtis. There will not be a third Billy #punisher #ThePunisher #netflix http…', 'truncated': False, 'entities': {'hashtags': [{'text': 'punisher', 'indices': [103, 112]}, {'text': 'ThePunisher', 'indices': [113, 125]}, {'text': 'netflix', 'indices': [126, 134]}], 'symbols': [], 'user_mentions': [{'screen_name': 'JasonMooreENT', 'name': 'Jason R. Moore', 'id': 773721649971601408, 'id_str': '773721649971601408', 'indices': [3, 17]}], 'urls': []}, 'metadata': {'iso_language_code': 'en', 'result_type': 'recent'}, 'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 536941347, 'id_s

## Flatten the list of tweets, get the fields we are interested in using list comprehension into a dataframe and export the data to a CSV file

In [12]:
def GetTweetFields(tr):
    return {"text": tr["text"]
            , "created_at": tr["created_at"]
            , "favourites_count" :  tr["user"]["favourites_count"]
            }
alltweets = [[GetTweetFields(ttrr) for ttrr in tr["statuses"]] for tr in tweet_results]
print(alltweets[11])
df = pd.DataFrame([item for sublist in alltweets for item in sublist],columns=['text','created_date','favourites_count'])

#export the dataframe to csv
df.to_csv('tweets.csv')

[{'text': "RT @outlander_clips: What this girl has done this season is beyond amazing. @SkeltonSophie I'm eternally grateful for your performance, you…", 'created_at': 'Wed Jan 30 23:54:08 +0000 2019', 'favourites_count': 3036}, {'text': "yeah... I'm not buying that Roger decided to not come back to Bree #Outlander https://t.co/NbuKGiPrV0", 'created_at': 'Wed Jan 30 23:54:07 +0000 2019', 'favourites_count': 8451}, {'text': 'RT @Outlander_STARZ: A man of worth he truly is. ❤️ #Outlander https://t.co/2S5Y7go2CO', 'created_at': 'Wed Jan 30 23:54:03 +0000 2019', 'favourites_count': 932}, {'text': "RT @Outlander_STARZ: We're not crying, you're crying.....J/K we're crying. 😭💔 #Outlander https://t.co/QuGhEb2oyY", 'created_at': 'Wed Jan 30 23:53:29 +0000 2019', 'favourites_count': 28475}, {'text': "RT @Outlander_STARZ: We're not crying, you're crying.....J/K we're crying. 😭💔 #Outlander https://t.co/QuGhEb2oyY", 'created_at': 'Wed Jan 30 23:53:10 +0000 2019', 'favourites_count': 2289}, {'text':