In [2]:
# load in relevant packages
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# load dataset
df_art = pd.read_csv('articles.csv', sep=',')
df_art

Unnamed: 0.1,Unnamed: 0,id,title,publication,content,political_bias
0,0,17283,House Republicans Fret About Winning Their Hea...,New York Times,WASHINGTON — Congressional Republicans have...,-2.2
1,1,17284,Rift Between Officers and Residents as Killing...,New York Times,"After the bullet shells get counted, the blood...",-2.2
2,2,17285,"Tyrus Wong, ‘Bambi’ Artist Thwarted by Racial ...",New York Times,"When Walt Disney’s “Bambi” opened in 1942, cri...",-2.2
3,3,17286,"Among Deaths in 2016, a Heavy Toll in Pop Musi...",New York Times,"Death may be the great equalizer, but it isn’t...",-2.2
4,4,17287,Kim Jong-un Says North Korea Is Preparing to T...,New York Times,"SEOUL, South Korea — North Korea’s leader, ...",-2.2
...,...,...,...,...,...,...
55995,138914,209012,9 mistakes you’re probably making while dining...,Washington Post,That thing you do all the time when you e...,-2.2
55996,138915,209013,These fashion tribes use personal style to rag...,Washington Post,Tembisa Revolution Is a group of five ...,-2.2
55997,138916,209017,Chaos in Chinese stock market sends economic w...,Washington Post,"BEIJING — Just a week ago, Chinese Presid...",-2.2
55998,138917,209018,"As Obama tries to bridge divide on guns, it se...",Washington Post,Ahead of President Obama’s event on guns T...,-2.2


In [4]:

# vectorize articles based on their title
vectorizer = TfidfVectorizer(strip_accents='unicode', lowercase=True, stop_words='english')
articles_vectors = vectorizer.fit_transform(df_art['title'])

# calculate cosine similarity between article vectors and create similarity matrix
sim_matrix = cosine_similarity(articles_vectors, articles_vectors)

In [5]:
# turn similarity matrix into Pandas dataframe
sim_df = pd.DataFrame(sim_matrix, index=df_art.index, columns=df_art.index)

# create function that gets similarity scores for a specific article
def get_similarities(article_id):
    similarities = sim_df.loc[article_id]
    # drop similarity with the same article
    similarities.drop(article_id, inplace=True)
    return similarities.rename('similarity').to_frame()

# create function that adds similarity scores to complete dataframe
def recommend_articles(article_id):
    similarities = get_similarities(article_id)
    # drop similarity with the same article
    art_df = df_art.drop(article_id, axis='rows').join(similarities)
    return art_df.sort_values('similarity', ascending=False)



In [6]:
# create filter function that removes articles that are politically polarizing to user
def filter_higher(user_rating, articles):
    # if user has a right political score
    if user_rating > 1:
        filtered_df = articles[articles['political_bias'] <= user_rating]
    # if user has a left political score
    elif user_rating < -1:
        filtered_df = articles[articles['political_bias'] >= user_rating]
    # if user has centre political score
    else:
        filtered_df = articles 
    return filtered_df

# create filter function that removes politically distant articles to user
def filter_buffer(user_rating, articles):
    filtered_df = articles[abs(articles['political_bias'] - user_rating) < 3]
    return filtered_df


In [7]:
# create function that adds polarization score and accountability score to dataframe
def accountability_score(articles):
    # polarization score of an article is higher the closer to zero
    articles['polarization_score'] = 1 - (abs(articles['political_bias']) / 6)
    # accountability score of an article is weighted score of similarity and polarization
    articles['accountability_score'] = articles['similarity'] * articles['polarization_score']
    return articles


In [11]:
# create mock users with political bias scores of their nine last read articles
users = {
    'User1': [5, 3.2, 3.2, 1.8, 5, 1.8, 2.5, 3.2, 2.5], 
    'User2': [-2.2, -2.2, -3.5, -4, -2.6, -2.2, -2, -4, -4],
    'User3': [-1.3, -1.3, -0.7, 1.8, 1.8, -2, -2.2, -0.7, -1.3],
}

# create function that gets average political bias of user
def user_average(user):
    average = sum(users[user]) / len(users[user])
    return average

# for every mock user get top 10 recommendations based on last read article
for user in users.keys():
    print()
    # create (random) article which user has last read
    random_article = df_art.sample(1)
    article_id = random_article.index[0]

    # create average political rating of user, including polarization score of last read article
    article_score = df_art['political_bias'][article_id]
    users[user].append(article_score)
    user_rating = user_average(user)
    
    # create recommender score of all articles based on article id
    # recommender score is cosine similarity
    articles = recommend_articles(article_id)

    # filter recommendations so they are not politically polarizing for the user
    filtered_articles = filter_higher(user_rating, articles)
    # filter recommendatiosn so they are in range of political spectrum of user
    filtered_articles = filter_buffer(user_rating, filtered_articles)

    # create accountability score for all remaining recommendations
    accountable_articles = accountability_score(filtered_articles)

    # display top 10 articles 
    accountable_articles = accountable_articles.sort_values('accountability_score', ascending=False).head(10)
    recommendations = []
    recommendations = accountable_articles[['title', 'political_bias', 'accountability_score']].values.tolist()
    
    print(users[user])
    print(user_rating)
    print('Recommendations for article "{}":\n'.format(random_article.title.values[0]))
    print(*recommendations, sep='\n')



[5, 3.2, 3.2, 1.8, 5, 1.8, 2.5, 3.2, 2.5, -2.2]
2.6
Recommendations for article "This is what it’s like answering all those phone calls to Congress":

['Beware of answering with this one word in new phone\xa0scam', 1.8, 0.34091846314618973]
['Keurig is answering your beer\xa0prayers', 1.8, 0.21643589307655114]
['Asking for someone’s phone number is\xa0over', 1.8, 0.18201457622998635]
['Your phone might one day detect if you have\xa0Alzheimer’s', 1.8, 0.1472105821491023]
['Unlock the Terrorist’s Phone', 2.5, 0.1469619301131902]
['Donald Trump has Lindsey Graham’s phone number\xa0again', 1.8, 0.14303540561855746]
['Man admits he used phone to record sex with\xa0women', 1.8, 0.1410513320962058]
['How to protect your phone messages from CIA\xa0spying', 1.8, 0.14076367973624712]
['I Like You, Ben Carson — Just Not like That', 2.5, 0.1360125166882399]
['Calls to Arms, Part I', 2.5, 0.13548605704310765]

[-2.2, -2.2, -3.5, -4, -2.6, -2.2, -2, -4, -4, -2.6]
-2.93
Recommendations for article "

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  articles['polarization_score'] = 1 - (abs(articles['political_bias']) / 6)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  articles['accountability_score'] = articles['similarity'] * articles['polarization_score']
