### Prepare the Workstation

In [1]:
import yaml 
from yaml.loader import SafeLoader

In [2]:
twitter_creds = yaml.safe_load(open('twitter_tmp.yaml', 'r').read())

In [3]:
twitter_creds.keys()

dict_keys(['api_key', 'api_secret_key', 'access_token', 'access_token_secret'])

In [4]:
!pip install twitter



In [5]:
from twitter import *

In [6]:
# Now we can define the access token, access token secret, api key and api key secret. Keep the Order as that.
twitter_api = Twitter(auth = OAuth(twitter_creds['access_token'],
                                   twitter_creds['access_token_secret'],
                                   twitter_creds['api_key'],
                                   twitter_creds['api_secret_key']))

In [7]:
# Check if we have an active connection
print(twitter_api)

<twitter.api.Twitter object at 0x0000019697CDBC40>


### Worldwide trends

We will create a list of all trending topics around the world. 

In [10]:
# Identify a worldwide trend
trends_worldwide = twitter_api.trends.available()

# How many trends are available?
print(len(trends_worldwide))

# Example of trends_worldwide
trends_worldwide[0]

467


{'name': 'Worldwide',
 'placeType': {'code': 19, 'name': 'Supername'},
 'url': 'http://where.yahooapis.com/v1/place/1',
 'parentid': 0,
 'country': '',
 'woeid': 1,
 'countryCode': None}

In [11]:
# As the output tells us there are 467 trends available in the world, what if we wanted to check the trend in one city?

list_of_names = [_['name'] for _ in trends_worldwide]

# List of first 10 cities
list_of_names[0:10]

['Worldwide',
 'Winnipeg',
 'Ottawa',
 'Quebec',
 'Montreal',
 'Toronto',
 'Edmonton',
 'Calgary',
 'Vancouver',
 'Birmingham']

In [14]:
# Find our city
our_city = 'Seoul'

# Create a variable
list_of_names_our_city = [_ for _ in trends_worldwide if _['name'] == our_city]

# View the output
print(len(list_of_names_our_city))

# Use the index to find Seoul
list_of_names_our_city[0]

1


{'name': 'Seoul',
 'placeType': {'code': 7, 'name': 'Town'},
 'url': 'http://where.yahooapis.com/v1/place/1132599',
 'parentid': 23424868,
 'country': 'Korea',
 'woeid': 1132599,
 'countryCode': 'KR'}

In [15]:
# Now we can check the trends for a city. In the documentation we are told to use woeid 
list_of_names_our_city[0]['woeid']

1132599

In [22]:
# Seoul's woeid is 1132599 let's find some other woids
home_city = 'Berlin'

list_of_names_home = [_ for _ in trends_worldwide if _['name'] == home_city]

print(len(list_of_names_home))

list_of_names_home[0]['woeid']

1


638242

### Identify common trends between two cities

Now that we have identified trends and learned how to get the woeid of a city, let's identify and compare common trends between berlin and seoul

In [23]:
# Create trends in seoul variable

seoul_trends = twitter_api.trends.place(_id = list_of_names_our_city[0]['woeid'])

# View the output
seoul_trends

[{'trends': [{'name': '#jhope_MORE',
    'url': 'http://twitter.com/search?q=%23jhope_MORE',
    'promoted_content': None,
    'query': '%23jhope_MORE',
    'tweet_volume': 2450658},
   {'name': '#누구보다_빠른_7월_최애_선점',
    'url': 'http://twitter.com/search?q=%23%EB%88%84%EA%B5%AC%EB%B3%B4%EB%8B%A4_%EB%B9%A0%EB%A5%B8_7%EC%9B%94_%EC%B5%9C%EC%95%A0_%EC%84%A0%EC%A0%90',
    'promoted_content': None,
    'query': '%23%EB%88%84%EA%B5%AC%EB%B3%B4%EB%8B%A4_%EB%B9%A0%EB%A5%B8_7%EC%9B%94_%EC%B5%9C%EC%95%A0_%EC%84%A0%EC%A0%90',
    'tweet_volume': None},
   {'name': '커밍아웃',
    'url': 'http://twitter.com/search?q=%EC%BB%A4%EB%B0%8D%EC%95%84%EC%9B%83',
    'promoted_content': None,
    'query': '%EC%BB%A4%EB%B0%8D%EC%95%84%EC%9B%83',
    'tweet_volume': 13260},
   {'name': '트친 메뉴판',
    'url': 'http://twitter.com/search?q=%22%ED%8A%B8%EC%B9%9C+%EB%A9%94%EB%89%B4%ED%8C%90%22',
    'promoted_content': None,
    'query': '%22%ED%8A%B8%EC%B9%9C+%EB%A9%94%EB%89%B4%ED%8C%90%22',
    'tweet_volume': None},


In [24]:
# let's look at this output as a pandas dataframe
# import Pandas
import pandas as pd

# Create a DataFrame
seoul_trends_pd = pd.DataFrame(seoul_trends[0]['trends'])

# View the DataFrame
seoul_trends_pd

Unnamed: 0,name,url,promoted_content,query,tweet_volume
0,#jhope_MORE,http://twitter.com/search?q=%23jhope_MORE,,%23jhope_MORE,2450658.0
1,#누구보다_빠른_7월_최애_선점,http://twitter.com/search?q=%23%EB%88%84%EA%B5...,,%23%EB%88%84%EA%B5%AC%EB%B3%B4%EB%8B%A4_%EB%B9...,
2,커밍아웃,http://twitter.com/search?q=%EC%BB%A4%EB%B0%8D...,,%EC%BB%A4%EB%B0%8D%EC%95%84%EC%9B%83,13260.0
3,트친 메뉴판,http://twitter.com/search?q=%22%ED%8A%B8%EC%B9...,,%22%ED%8A%B8%EC%B9%9C+%EB%A9%94%EB%89%B4%ED%8C...,
4,문송안함,http://twitter.com/search?q=%EB%AC%B8%EC%86%A1...,,%EB%AC%B8%EC%86%A1%EC%95%88%ED%95%A8,
5,#모어_다운후스밍해,http://twitter.com/search?q=%23%EB%AA%A8%EC%96...,,%23%EB%AA%A8%EC%96%B4_%EB%8B%A4%EC%9A%B4%ED%9B...,23044.0
6,사망원인,http://twitter.com/search?q=%EC%82%AC%EB%A7%9D...,,%EC%82%AC%EB%A7%9D%EC%9B%90%EC%9D%B8,
7,나의 전생,http://twitter.com/search?q=%22%EB%82%98%EC%9D...,,%22%EB%82%98%EC%9D%98+%EC%A0%84%EC%83%9D%22,
8,주류박람회,http://twitter.com/search?q=%EC%A3%BC%EB%A5%98...,,%EC%A3%BC%EB%A5%98%EB%B0%95%EB%9E%8C%ED%9A%8C,
9,#더_킹이홉스럽게_모어,http://twitter.com/search?q=%23%EB%8D%94_%ED%8...,,%23%EB%8D%94_%ED%82%B9%EC%9D%B4%ED%99%89%EC%8A...,10965.0


In [27]:
# Since we have a ton of data, we might want to limit the tweets to those with more than 100000 tweets

seoul_trends_over100k = seoul_trends_pd[seoul_trends_pd['tweet_volume'] > 10000]\
.sort_values('tweet_volume', ascending = False)

print(seoul_trends_over100k.shape)
seoul_trends_over100k

(8, 5)


Unnamed: 0,name,url,promoted_content,query,tweet_volume
0,#jhope_MORE,http://twitter.com/search?q=%23jhope_MORE,,%23jhope_MORE,2450658.0
5,#모어_다운후스밍해,http://twitter.com/search?q=%23%EB%AA%A8%EC%96...,,%23%EB%AA%A8%EC%96%B4_%EB%8B%A4%EC%9A%B4%ED%9B...,23044.0
23,지하철 사고,http://twitter.com/search?q=%22%EC%A7%80%ED%95...,,%22%EC%A7%80%ED%95%98%EC%B2%A0+%EC%82%AC%EA%B3...,19490.0
30,srt 탈선,http://twitter.com/search?q=%22srt+%ED%83%88%E...,,%22srt+%ED%83%88%EC%84%A0%22,18691.0
39,청소노동자,http://twitter.com/search?q=%EC%B2%AD%EC%86%8C...,,%EC%B2%AD%EC%86%8C%EB%85%B8%EB%8F%99%EC%9E%90,17021.0
12,아기대통령,http://twitter.com/search?q=%EC%95%84%EA%B8%B0...,,%EC%95%84%EA%B8%B0%EB%8C%80%ED%86%B5%EB%A0%B9,16445.0
2,커밍아웃,http://twitter.com/search?q=%EC%BB%A4%EB%B0%8D...,,%EC%BB%A4%EB%B0%8D%EC%95%84%EC%9B%83,13260.0
9,#더_킹이홉스럽게_모어,http://twitter.com/search?q=%23%EB%8D%94_%ED%8...,,%23%EB%8D%94_%ED%82%B9%EC%9D%B4%ED%99%89%EC%8A...,10965.0


In [28]:
# Now we have a list of 8 tweets, let's save this as a csv fle

seoul_trends_over100k.to_csv('seoul_tweets.csv', index = False)

### Trends for Berlin

As we are more familiar with the process of defining our town and the tweets lets do the same for berlin

In [29]:
# Create the trends for Berlin variable, let's find again the ID

our_city = 'Berlin'

list_of_names_berlin = [_ for _ in trends_worldwide if _['name'] == our_city]

print(len(list_of_names_berlin))

list_of_names_home[0]['woeid']

1


638242

In [31]:
# Got the ID, let's now figure out the trends for this ID

berlin_trends = twitter_api.trends.place(_id = list_of_names_home[0]['woeid'])

berlin_trends

[{'trends': [{'name': '#KulFaZ',
    'url': 'http://twitter.com/search?q=%23KulFaZ',
    'promoted_content': None,
    'query': '%23KulFaZ',
    'tweet_volume': None},
   {'name': '#BluesBrothers',
    'url': 'http://twitter.com/search?q=%23BluesBrothers',
    'promoted_content': None,
    'query': '%23BluesBrothers',
    'tweet_volume': None},
   {'name': 'Willkür',
    'url': 'http://twitter.com/search?q=Willk%C3%BCr',
    'promoted_content': None,
    'query': 'Willk%C3%BCr',
    'tweet_volume': None},
   {'name': '#StrangerThings4',
    'url': 'http://twitter.com/search?q=%23StrangerThings4',
    'promoted_content': None,
    'query': '%23StrangerThings4',
    'tweet_volume': 372614},
   {'name': '#DankeWieler',
    'url': 'http://twitter.com/search?q=%23DankeWieler',
    'promoted_content': None,
    'query': '%23DankeWieler',
    'tweet_volume': None},
   {'name': 'Maßnahmen',
    'url': 'http://twitter.com/search?q=Ma%C3%9Fnahmen',
    'promoted_content': None,
    'query': 'Ma%

In [32]:
# Make a DataFrame out of this raw export
berlin_trends_df = pd.DataFrame(berlin_trends[0]['trends'])

# View the DataFrame
berlin_trends_df

Unnamed: 0,name,url,promoted_content,query,tweet_volume
0,#KulFaZ,http://twitter.com/search?q=%23KulFaZ,,%23KulFaZ,
1,#BluesBrothers,http://twitter.com/search?q=%23BluesBrothers,,%23BluesBrothers,
2,Willkür,http://twitter.com/search?q=Willk%C3%BCr,,Willk%C3%BCr,
3,#StrangerThings4,http://twitter.com/search?q=%23StrangerThings4,,%23StrangerThings4,372614.0
4,#DankeWieler,http://twitter.com/search?q=%23DankeWieler,,%23DankeWieler,
5,Maßnahmen,http://twitter.com/search?q=Ma%C3%9Fnahmen,,Ma%C3%9Fnahmen,32783.0
6,#LauterbachRuecktrittSofort,http://twitter.com/search?q=%23LauterbachRueck...,,%23LauterbachRuecktrittSofort,
7,Bremsklotz,http://twitter.com/search?q=Bremsklotz,,Bremsklotz,
8,Evidenz,http://twitter.com/search?q=Evidenz,,Evidenz,
9,Treten Sie,http://twitter.com/search?q=%22Treten+Sie%22,,%22Treten+Sie%22,


In [34]:
# We can again limit the list by number of tweets
berlin_top_tweets = berlin_trends_df[berlin_trends_df['tweet_volume'] > 50000]\
.sort_values('tweet_volume', ascending = False)

berlin_top_tweets

Unnamed: 0,name,url,promoted_content,query,tweet_volume
21,Eddie,http://twitter.com/search?q=Eddie,,Eddie,395637.0
16,Canada,http://twitter.com/search?q=Canada,,Canada,387393.0
3,#StrangerThings4,http://twitter.com/search?q=%23StrangerThings4,,%23StrangerThings4,372614.0
22,Mike,http://twitter.com/search?q=Mike,,Mike,253630.0
39,#BritishGP,http://twitter.com/search?q=%23BritishGP,,%23BritishGP,56977.0
40,#TheBoys,http://twitter.com/search?q=%23TheBoys,,%23TheBoys,56708.0


### Common trends

As both cities are very unlikely to have same trends let's try some other cities first

In [37]:
# Create the trends for New York variable, let's find again the ID

our_city = 'New York'

list_of_names_ny = [_ for _ in trends_worldwide if _['name'] == our_city]

print(len(list_of_names_ny))

list_of_names_ny[0]['woeid']

1


2459115

In [38]:
# Get the trend now for New York this time we can use json

# Import Json
import json

# Search for New York
ny_trends =  twitter_api.trends.place(_id = 2459115)

# View the json output
print(json.dumps(ny_trends, indent = 4))

[
    {
        "trends": [
            {
                "name": "Pennsylvania",
                "url": "http://twitter.com/search?q=Pennsylvania",
                "promoted_content": null,
                "query": "Pennsylvania",
                "tweet_volume": 30579
            },
            {
                "name": "#TheChi",
                "url": "http://twitter.com/search?q=%23TheChi",
                "promoted_content": null,
                "query": "%23TheChi",
                "tweet_volume": 34163
            },
            {
                "name": "byers",
                "url": "http://twitter.com/search?q=byers",
                "promoted_content": null,
                "query": "byers",
                "tweet_volume": 28983
            },
            {
                "name": "Rinna",
                "url": "http://twitter.com/search?q=Rinna",
                "promoted_content": null,
                "query": "Rinna",
                "tweet_volume": 10943
            

In [45]:
# Again limit the tweet list to those above 100,000 tweets and save it then as csv file first we need to convert to pd

ny_trends = twitter_api.trends.place(_id =list_of_names_ny[0]['woeid'])

ny_trends_pd = pd.DataFrame(ny_trends[0]['trends'])

ny_top_tweets = ny_trends_pd[ny_trends_pd['tweet_volume'] > 100000]\
.sort_values('tweet_volume', ascending = False)

ny_top_tweets.to_csv('ny_top_tweets.csv', index = False)

In [46]:
# Let's compare them with the london tweets, let's get the woeid again

other_city = 'London'

list_of_names_other = [_ for _ in trends_worldwide if _['name'] == other_city]

print(len(list_of_names_other))

list_of_names_other[0]['woeid']

1


44418

In [47]:
london_trends = twitter_api.trends.place(_id = list_of_names_other[0]['woeid'])

london_trends_pd = pd.DataFrame(london_trends[0]['trends'])

london_trends_pd

Unnamed: 0,name,url,promoted_content,query,tweet_volume
0,Evelyn,http://twitter.com/search?q=Evelyn,,Evelyn,14181.0
1,#BritishGP,http://twitter.com/search?q=%23BritishGP,,%23BritishGP,55975.0
2,Gini,http://twitter.com/search?q=Gini,,Gini,85409.0
3,yes mo,http://twitter.com/search?q=%22yes+mo%22,,%22yes+mo%22,
4,Danica and Charlie,http://twitter.com/search?q=%22Danica+and+Char...,,%22Danica+and+Charlie%22,
5,#SilverstoneGP,http://twitter.com/search?q=%23SilverstoneGP,,%23SilverstoneGP,
6,#RLCS,http://twitter.com/search?q=%23RLCS,,%23RLCS,
7,Ramy,http://twitter.com/search?q=Ramy,,Ramy,
8,Hopper,http://twitter.com/search?q=Hopper,,Hopper,46006.0
9,#EarpFest,http://twitter.com/search?q=%23EarpFest,,%23EarpFest,


### Find common topics

In [48]:
# define the trend list variable for ny

ny_trend_list = [trend['name'] for trend in ny_trends[0]['trends']]

# View the output
print(ny_trend_list)

['Pennsylvania', '#TheChi', 'byers', 'Rinna', '#RHODubai', 'Canadians', '#fridaymorning', 'Eddie', 'hopper', "They're a 10", 'Zion', 'Gobert', 'Pat Bev', 'Pat Bev', 'Timberwolves', 'Brogdon', '#samsummerfridays', '#AskCardi', 'Mitchell', 'Celtics', 'Canada', 'Danny Ainge', 'Brad Stevens', 'Pacers', 'Patrick Beverly', '4th of July', 'stranger things', 'KAT and Rudy', 'Brian Windhorst', 'Walker Kessler', 'Brit', 'Vando', 'Rudy and KAT', 'Wyoming', 'Tim Connelly', 'Liz Cheney', 'Windy', 'Spida', 'Bobby Bonilla', 'Independence Day', 'Beasley', 'Theis', 'Voyager', 'Splash Mountain', 'Rich Hill', 'Minny', 'McDaniels', 'Liberal World Order', 'Huerter', 'Tiana']


In [49]:
# define the trend list variable for lodon

london_trend_list = [trend['name'] for trend in london_trends[0]['trends']]

# View the output
print(london_trend_list)

['Evelyn', '#BritishGP', 'Gini', 'yes mo', 'Danica and Charlie', '#SilverstoneGP', '#RLCS', 'Ramy', 'Hopper', '#EarpFest', 'Season 5', 'Shane', 'Copenhagen', 'Heather Watson', 'Iain', 'Neil Parish', 'Adele', 'Eddie Munson', 'Camara', 'BloJo', 'Williamson', 'Claudia Webbe', 'Butter', 'British Grand Prix', 'Somme', 'Moshiri', 'Debbie Harry', 'Tamworth', 'Steve Wright', 'Scott Mills', 'Radio 2', 'Gobert', 'Rams', 'Spider', 'Ken Bruce', 'Pat Bev', 'Martinez', 'Conor Burns', 'Granit Xhaka', 'Lickey End', 'David Clowes', 'Toyah', 'Derby County', 'Pant', 'Timberwolves', 'Mimi', 'Venus', 'Mo Salah', 'Israel']


In [50]:
# Now we have both lists of all trending topics, lets compare them by creting sets of these lists
ny_trends_sets = set(ny_trend_list)
london_trend_set = set(london_trend_list)

# Set the variable
common_trends = ny_trends_sets.intersection(london_trend_set)

# View the output
print(common_trends)

{'Gobert', 'Pat Bev', 'Timberwolves'}
