# Political success, May 2021

In [1]:
import pandas
import numpy
from numpy import std, correlate, sqrt
from scipy.stats import pearsonr
import statsmodels.api as sm
from linearmodels import IV2SLS
%run Table_Summary.ipynb

In [2]:
data = pandas.read_csv(r'D:/BP/Data/APA_ASA.txt',sep='\t')
data['MAR'] = pandas.to_numeric(data.MAR, errors='coerce')
data['MRR'] = 100 - 100*data['MAR']
data['Subscriptions'] = data['Printed_Subsc_Institutional']
data['Journal'] = data.groupby(['ID'])[['Journal']].fillna(method='ffill')
data['ASA'] = data.groupby(['ID'])[['ASA']].fillna(method='ffill')
data = data[~((data.Journal=='Contexts') | (data.Journal=='Rose'))]
data = data[['Journal','Year','MRR','JIF','Subscriptions','Citable_Items','JIF_Percentile','ASA']]
data = data.set_index(['Journal','Year']).sort_index()
data

Unnamed: 0_level_0,Unnamed: 1_level_0,MRR,JIF,Subscriptions,Citable_Items,JIF_Percentile,ASA
Journal,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
American Psychologist,2004,85.2,5.494,1938.0,36.0,95.500,0.0
American Psychologist,2005,76.4,6.460,1599.0,46.0,96.535,0.0
American Psychologist,2006,74.8,7.829,1532.0,46.0,95.455,0.0
American Psychologist,2007,84.5,6.967,1330.0,48.0,96.569,0.0
American Psychologist,2008,77.8,7.106,1219.0,34.0,96.535,0.0
...,...,...,...,...,...,...,...
Teaching Sociology,2012,80.9,,2176.0,,,1.0
Teaching Sociology,2013,84.0,0.980,2892.0,33.0,57.609,1.0
Teaching Sociology,2014,85.3,0.464,2995.0,26.0,26.408,1.0
Teaching Sociology,2015,,0.559,3049.0,24.0,29.930,1.0


## Variables

In [3]:
mydf = data.copy()
mydf = mydf.reset_index()
mydf['SA']=mydf.groupby(['Year']).JIF.transform('mean')
mydf['SA']=mydf['SA'].rolling(window=5,min_periods=1).mean()
mydf['JIF']=mydf.groupby(['Journal'])['JIF'].rolling(window=5,min_periods=1).mean().reset_index(0,drop=True)
mydf['distance']=mydf['JIF']-mydf['SA']
mydf.loc[mydf.distance<0,'Political Success']=1.05*mydf['SA']
mydf.loc[mydf.distance>0,'Political Success']=mydf['JIF']
mydf['Subscriptions']=mydf['Subscriptions']/1000
mydf['JIF Percentile'] = mydf.groupby('Year')['JIF_Percentile'].transform(pandas.qcut,2,labels=False,duplicates='drop')
mydf['Citable Items'] = mydf.groupby('Year')['Citable_Items'].transform(pandas.qcut,2,labels=False,duplicates='drop')
mydf

Unnamed: 0,Journal,Year,MRR,JIF,Subscriptions,Citable_Items,JIF_Percentile,ASA,SA,distance,Political Success,JIF Percentile,Citable Items
0,American Psychologist,2004,85.2,5.494000,1.938,36.0,95.500,0.0,2.907630,2.586370,5.494000,1.0,0.0
1,American Psychologist,2005,76.4,5.977000,1.599,46.0,96.535,0.0,2.829190,3.147810,5.977000,1.0,0.0
2,American Psychologist,2006,74.8,6.594333,1.532,46.0,95.455,0.0,2.890892,3.703441,6.594333,1.0,0.0
3,American Psychologist,2007,84.5,6.687500,1.330,48.0,96.569,0.0,2.899298,3.788202,6.687500,1.0,0.0
4,American Psychologist,2008,77.8,6.771200,1.219,34.0,96.535,0.0,2.984611,3.786589,6.771200,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
511,Teaching Sociology,2012,80.9,0.663500,2.176,,,1.0,3.300109,-2.636609,3.465115,,
512,Teaching Sociology,2013,84.0,0.781000,2.892,33.0,57.609,1.0,3.319557,-2.538557,3.485535,0.0,0.0
513,Teaching Sociology,2014,85.3,0.722000,2.995,26.0,26.408,1.0,3.383563,-2.661563,3.552741,0.0,0.0
514,Teaching Sociology,2015,,0.667667,3.049,24.0,29.930,1.0,3.398727,-2.731061,3.568664,0.0,0.0


## Preparing data for regression

In [4]:
XY = mydf.copy()
XY = XY[['MRR','Political Success','Subscriptions','JIF Percentile','Citable Items','Journal','Year']]
XY = XY.dropna()
X1 = XY[['Political Success','Subscriptions','JIF Percentile','Citable Items','Journal','Year']]
X1 = pandas.get_dummies(X1,columns=['Journal','Year'])
X1 = X1.drop(['Journal_American Psychologist','Year_2004'],axis=1)
X1['Political Success'] = X1['Political Success']-X1['Political Success'].mean()
X1['Subscriptions'] = X1['Subscriptions']-X1['Subscriptions'].mean()
X1['Political Success*Subscriptions'] = X1['Political Success'] * X1['Subscriptions']
X1['Intercept'] = 1
X0 = X1.drop(['Political Success*Subscriptions'],axis=1)

## Regression

In [5]:
r0 = []
r1 = []
window = 5
reg0 = sm.OLS(XY['MRR'],X0).fit(cov_type='hac-groupsum',cov_kwds={'time':XY.set_index('Year').index,'maxlags':window**2})
r0.append((reg0.params,reg0.bse,reg0.pvalues,reg0.rsquared_adj))
print(reg0.summary())
reg1 = sm.OLS(XY['MRR'],X1).fit(cov_type='hac-groupsum',cov_kwds={'time':XY.set_index('Year').index,'maxlags':window**2})
r1.append((reg1.params,reg1.bse,reg1.pvalues,reg1.rsquared_adj))
reg1.summary()

                            OLS Regression Results                            
Dep. Variable:                    MRR   R-squared:                       0.760
Model:                            OLS   Adj. R-squared:                  0.726
Method:                 Least Squares   F-statistic:                 4.591e+13
Date:                Wed, 28 Apr 2021   Prob (F-statistic):          1.67e-227
Time:                        06:38:54   Log-Likelihood:                -1431.2
No. Observations:                 434   AIC:                             2970.
Df Residuals:                     380   BIC:                             3190.
Df Model:                          53                                         
Covariance Type:         hac-groupsum                                         
                                                             coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------



0,1,2,3
Dep. Variable:,MRR,R-squared:,0.76
Model:,OLS,Adj. R-squared:,0.726
Method:,Least Squares,F-statistic:,169100000000000.0
Date:,"Wed, 28 Apr 2021",Prob (F-statistic):,2.05e-237
Time:,06:38:54,Log-Likelihood:,-1430.9
No. Observations:,434,AIC:,2972.0
Df Residuals:,379,BIC:,3196.0
Df Model:,54,,
Covariance Type:,hac-groupsum,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Political Success,0.5413,0.175,3.101,0.002,0.199,0.884
Subscriptions,0.0523,0.511,0.102,0.918,-0.949,1.053
JIF Percentile,0.6059,0.487,1.244,0.214,-0.349,1.561
Citable Items,-1.8740,0.580,-3.231,0.001,-3.011,-0.737
Journal_American Sociological Review,15.0947,2.353,6.416,0.000,10.484,19.706
Journal_Behavioral Neuroscience,-17.6255,2.892,-6.094,0.000,-23.294,-11.957
Journal_Contemporary Sociology,-11.2652,2.471,-4.558,0.000,-16.109,-6.421
Journal_Developmental Psychology,0.0361,0.951,0.038,0.970,-1.827,1.899
Journal_Emotion,-0.7357,2.865,-0.257,0.797,-6.351,4.879

0,1,2,3
Omnibus:,42.582,Durbin-Watson:,2.001
Prob(Omnibus):,0.0,Jarque-Bera (JB):,217.819
Skew:,0.158,Prob(JB):,5.03e-48
Kurtosis:,6.456,Cond. No.,118.0


In [6]:
table0 = Table_Summary(r0)
table0 = table0.reindex(['Political Success','Subscriptions','JIF Percentile','Citable Items','Intercept','Adjusted R2'],level=0)
table0 = table0.reindex(['Coeff','CSE','p-val',''],level=1)
table0.columns = ['Base Model']

table1 = Table_Summary(r1)
table1 = table1.reindex(['Political Success','Subscriptions','Political Success*Subscriptions','Citable Items','JIF Percentile','Intercept','Adjusted R2'],level=0)
table1 = table1.reindex(['Coeff','CSE','p-val',''],level=1)
table1.columns = ['Full Model']

table = pandas.merge(table0,table1,on=['Variables','Statistics'],how='outer')
table = table.reindex(['Political Success','Subscriptions','Political Success*Subscriptions','Citable Items','JIF Percentile','Intercept','Adjusted R2'],level=0)
table.round(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Base Model,Full Model
Variables,Statistics,Unnamed: 2_level_1,Unnamed: 3_level_1
Political Success,Coeff,0.948,0.541
Political Success,CSE,0.146,0.175
Political Success,p-val,0.0,0.002
Subscriptions,Coeff,-0.007,0.052
Subscriptions,CSE,0.544,0.511
Subscriptions,p-val,0.989,0.918
Political Success*Subscriptions,Coeff,,-0.429
Political Success*Subscriptions,CSE,,0.089
Political Success*Subscriptions,p-val,,0.0
Citable Items,Coeff,-1.73,-1.874


In [7]:
table.to_excel('Table-PS_robust2.xlsx',float_format="%.3f")