# A quantitative approach to improve my twitter feed content quality

A few observations:
- Finding and choosing people to follow is overwhelming.
- It is very hard to balance between quantity and quality.
- Negative tweets tend to make twitter less enjoyable for me.

Let's fix that with data

In [None]:
import tweepy  
import time
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# token must be written in a file called twittercreds
import twittercreds
ACCESS_TOKEN = twittercreds.access_token  
ACCESS_TOKEN_SECRET = twittercreds.access_token_secret
API_KEY = twittercreds.consumer_key
API_SECRET = twittercreds.consumer_secret

In [None]:
MY_TWITTER_HANDLE = 'pierre_ge'

## Login to twitter

In [None]:
auth = tweepy.OAuthHandler(API_KEY, API_SECRET)  
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)  
api = tweepy.API(auth)

## Tweets

In [None]:
def get_tweets(name, n=10, exclude_replies=False):
    '''Get the last n tweets from someone'''
    tweets = []
    for i, tweet in enumerate(tweepy.Cursor(api.user_timeline, screen_name=name, exclude_replies=exclude_replies).items()):                      
        tweets.append(tweet)
        if i >= n:
            break
    return tweets

In [None]:
def get_tweets_as_df(name, n=10, exclude_replies=False):
    ''' Because dataframe are so easy to manipulate'''
    raw = get_tweets(name, n, exclude_replies)
    return pd.DataFrame([obj._json for obj in raw])

In [None]:
df_tweets = get_tweets_as_df(MY_TWITTER_HANDLE)

Get my last two tweets

In [None]:
df_tweets.head(2)

## Following / friends

Collect the data on the people I'm following

In [None]:
def get_friends(name):
    friends = []  
    for page in tweepy.Cursor(api.friends_ids, screen_name=name).pages():  
        friends.extend(page)
    return friends

In [None]:
def get_name_from_id(idtwitter):
    return api.get_user(idtwitter)._json['screen_name']

In [None]:
friends_id = get_friends('pierre_ge')
friends_names = [get_name_from_id(i) for i in friends_id]

## Collect friends tweets

In [None]:
df_friendstweets = pd.DataFrame()
for friend in friends_names:
    while True:
        try:
            friend_df = get_tweets_as_df(friend, n=10000)
            break
        except:
            print('Query failed, possible rate limiting')
            time.sleep(60*5)
    friend_df['name'] = friend
    df_friendstweets = pd.concat([df_friendstweets, friend_df], sort=False)

In [None]:
len(df_friendstweets.name.unique())

## Find silent/noisy accounts

In [None]:
silent_accounts = []
for name in friends_names:
    friend_df = df_friendstweets[df_friendstweets.name == name].head(40)
    date = pd.to_datetime(friend_df.created_at).mean()
    silent_accounts.append({'name':name, 'mean_date':date})

In [None]:
threshold_low = 360 # days
threshold_high = 1 # days

#### Consider unfollowing:

Silent

In [None]:
for account in silent_accounts:
    if account['mean_date'].tz_localize(None) < pd.datetime.now() - pd.Timedelta(days=threshold_low):
        print('https://twitter.com/{}'.format(account['name']))

Noisy

In [None]:
for account in silent_accounts:
    if account['mean_date'].tz_localize(None) > pd.datetime.now() - pd.Timedelta(days=threshold_high):
        print('https://twitter.com/{}'.format(account['name']))

## Get new potential following from likes

In [None]:
def get_likes(name, n=100):
    '''Get the last n tweets from someone'''
    tweets = []
    for i, tweet in enumerate(tweepy.Cursor(api.favorites, screen_name=name).items()):                      
        tweets.append(tweet)
        if i >= n:
            break
    return tweets

In [None]:
def get_likes_as_df(name, n=100):
    ''' Because dataframe are so easy to manipulate'''
    raw = get_likes(name, n)
    return pd.DataFrame([obj._json for obj in raw])

In [None]:
df_likes = get_likes_as_df(MY_TWITTER_HANDLE)

In [None]:
liked_user = df_likes.user.apply(lambda x :x['screen_name']).value_counts()
liked_user.head(3)

In [None]:
list(liked_user.index.values[:4])

In [None]:
score = {}
def upgrade_score(row):
    mentions = row['entities']['user_mentions']
    for m in mentions:
        name = m['screen_name']
        if name in score:
            score[name] += 1
        else:
            score[name] = 1
_ = df_friendstweets[df_friendstweets.name.isin(list(liked_user.index.values[:4]))].apply(upgrade_score, axis=1)

Consider following

In [None]:
[(k, score[k]) for k in sorted(score, key=score.get, reverse=True)][:20]

## Unfollow the negative content

In [None]:
analyzer = SentimentIntensityAnalyzer()

In [None]:
df_friendstweets['sentiment_compound'] = df_friendstweets.text.apply(lambda sentence: analyzer.polarity_scores(sentence)['compound'])
df_friendstweets['neu'] = df_friendstweets.text.apply(lambda sentence: analyzer.polarity_scores(sentence)['neu'])
df_friendstweets['pos'] = df_friendstweets.text.apply(lambda sentence: analyzer.polarity_scores(sentence)['pos'])
df_friendstweets['neg'] = df_friendstweets.text.apply(lambda sentence: analyzer.polarity_scores(sentence)['neg'])

In [None]:
df_friendstweets_sub = df_friendstweets[['sentiment_compound', 'neu', 'pos', 'neg', 'name']]

In [None]:
df_friendstweets_sentiment = df_friendstweets_sub.groupby(['name']).mean().sort_values(by='sentiment_compound')

In [None]:
df_friendstweets_sentiment.sample()

In [None]:
df_friendstweets_sentiment['neu'] = df_friendstweets_sentiment['neu']/2

In [None]:
df_friendstweets_sentiment.sort_values(by='neg').plot(kind='bar', figsize=(40,9))

Consider unfollowing

In [None]:
df_friendstweets_sentiment.sort_values(by='neg').tail(20)