# Calculate page growth, per engagement

This notebook calculates each pages' growth, in terms of changes in monthly engagement between March 2016 and March 2017.

In [1]:
import pandas as pd
import datetime

## Load the aggregated data

In [2]:
monthly_engagement = pd.read_csv("../output/page-engagement-by-month.csv", dtype={"page_id": str})

## Merge the two comparison months

In [3]:
comparison = pd.merge(
    monthly_engagement[
        monthly_engagement["status_published"] == "2016-03-01"
    ][[ "page_id", "total_engagement" ]],
    monthly_engagement[
        monthly_engagement["status_published"] == "2017-03-01"
    ][[ "page_id", "total_engagement" ]],
    on="page_id",
    how="outer",
    suffixes=[ ".march2016", ".march2017" ]
)
comparison.head()

Unnamed: 0,page_id,total_engagement.march2016,total_engagement.march2017
0,100434040001314,52080.0,55029.0
1,1019871961378419,519875.0,1950091.0
2,103971336305218,96008.0,126475.0
3,104037195986,3905.0,3673.0
4,10498053716,419447.0,601169.0


## Add political category and page name information

In [4]:
page_info = pd.read_csv(
    "../output/fb-page-info-summary.csv",
    dtype={"page_id": str}
)
page_info.head()

Unnamed: 0,page_id,political_category,page_name,fan_count
0,100434040001314,left,Wonkette,96322
1,1014803551921469,right,SourcesNews,59306
2,1019871961378419,right,Rare America,2982929
3,1035617169863710,right,I DID NOT Vote For Hillary in 2016,36269
4,1036253643101134,left,Proud Liberal,510991


In [5]:
def categorize_growth(page):
    diff = page["total_engagement.march2017"] - page["total_engagement.march2016"]
    if diff > 0:
        return "grown_{political_category}".format(**page)
    elif diff == 0:
        return "flat_{political_category}".format(**page)
    else:
        return "shrunken_{political_category}".format(**page)

In [6]:
comparison_with_fb_info = pd.merge(
    comparison,
    page_info,
    on="page_id",
    how="left"
).assign(growth_category=lambda x: x.fillna(0).apply(categorize_growth, axis=1))
comparison_with_fb_info.head()

Unnamed: 0,page_id,total_engagement.march2016,total_engagement.march2017,political_category,page_name,fan_count,growth_category
0,100434040001314,52080.0,55029.0,left,Wonkette,96322,grown_left
1,1019871961378419,519875.0,1950091.0,right,Rare America,2982929,grown_right
2,103971336305218,96008.0,126475.0,right,Hot Air,820318,grown_right
3,104037195986,3905.0,3673.0,left,Black Agenda Report,23332,shrunken_left
4,10498053716,419447.0,601169.0,right,Media Research Center,1728105,grown_right


In [7]:
# Make sure we have partisanship categorizations for each page
assert comparison_with_fb_info["political_category"].isnull().sum() == 0

In [8]:
comparison_with_fb_info.to_csv("../output/page-engagement-growth.csv", index=False)

## Pare down growth to top 20 pages on left and right  for graphics
This part of the notebook takes the top 20 pages on the left and right and pares down the growth spreadsheet to those top 20.

In [9]:
top_20s = comparison_with_fb_info.groupby("political_category")\
    .apply(lambda x: x.nlargest(20, "fan_count"))
top_20s

Unnamed: 0_level_0,Unnamed: 1_level_0,page_id,total_engagement.march2016,total_engagement.march2017,political_category,page_name,fan_count,growth_category
political_category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
left,212,341163402640457,22751278.0,18783790.0,left,NowThis,11664950,shrunken_left
left,218,354522044588660,8231964.0,5459343.0,left,Upworthy,11255052,shrunken_left
left,215,346937065399354,26394599.0,36673027.0,left,Occupy Democrats,6665043,grown_left
left,19,114517875225866,12244729.0,19753039.0,left,The Other 98%,4986386,grown_left
left,95,160389977329803,4421326.0,5793239.0,left,ATTN:,4770076,grown_left
left,357,1340186812659728,,10240180.0,left,Truth Examiner,3573203,grown_left
left,328,979613892126968,1470734.0,5961902.0,left,Proud Liberals,2413702,grown_left
left,175,24674986856,258183.0,282455.0,left,Michael Moore,2265031,grown_left
left,428,610045389164725,,2371347.0,left,Liberal American,2111339,grown_left
left,184,273864989376427,2292659.0,2075205.0,left,MSNBC,1964609,shrunken_left


In [10]:
top_20s.loc["left"].to_csv("../output/page-engagement-growth-top-20-pages-left.csv", index=False)
top_20s.loc["right"].to_csv("../output/page-engagement-growth-top-20-pages-right.csv", index=False)

---

---

---