# Political success, May 2021

In [38]:
import pandas
import numpy
from numpy import std, correlate, sqrt
from scipy.stats import pearsonr
import statsmodels.api as sm
from linearmodels import IV2SLS
%run Table_Summary.ipynb

In [39]:
data = pandas.read_csv(r'D:/BP/Data/APA_ASA.txt',sep='\t')
data['MAR'] = pandas.to_numeric(data.MAR, errors='coerce')
data['MRR'] = 100 - 100*data['MAR']
data['Subscriptions'] = data['Printed_Subsc_Institutional']
data['Journal'] = data.groupby(['ID'])[['Journal']].fillna(method='ffill')
data['ASA'] = data.groupby(['ID'])[['ASA']].fillna(method='ffill')
data = data[~((data.Journal=='Contexts') | (data.Journal=='Rose'))]
data = data[['Journal','Year','MRR','JIF','Subscriptions','Citable_Items','JIF_Percentile','ASA']]
data = data.set_index(['Journal','Year']).sort_index()
data

Unnamed: 0_level_0,Unnamed: 1_level_0,MRR,JIF,Subscriptions,Citable_Items,JIF_Percentile,ASA
Journal,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
American Psychologist,2004,85.2,5.494,1938.0,36.0,95.500,0.0
American Psychologist,2005,76.4,6.460,1599.0,46.0,96.535,0.0
American Psychologist,2006,74.8,7.829,1532.0,46.0,95.455,0.0
American Psychologist,2007,84.5,6.967,1330.0,48.0,96.569,0.0
American Psychologist,2008,77.8,7.106,1219.0,34.0,96.535,0.0
...,...,...,...,...,...,...,...
Teaching Sociology,2012,80.9,,2176.0,,,1.0
Teaching Sociology,2013,84.0,0.980,2892.0,33.0,57.609,1.0
Teaching Sociology,2014,85.3,0.464,2995.0,26.0,26.408,1.0
Teaching Sociology,2015,,0.559,3049.0,24.0,29.930,1.0


## Variables

In [40]:
mydf = data.copy()
mydf = mydf.reset_index()
mydf['SA']=mydf.groupby(['Year']).JIF.transform('mean')
mydf['SA']=mydf['SA'].rolling(window=5,min_periods=1).mean()
mydf['JIF']=mydf.groupby(['Journal'])['JIF'].rolling(window=5,min_periods=1).mean().reset_index(0,drop=True)
mydf['distance']=mydf['JIF']-mydf['SA']
mydf.loc[mydf.distance<0,'Political Success']=1.05*mydf['SA']
mydf.loc[mydf.distance>0,'Political Success']=mydf['JIF']
mydf['Subscriptions']=numpy.log(mydf['Subscriptions'])
mydf['JIF Percentile'] = mydf.groupby('Year')['JIF_Percentile'].transform(pandas.qcut,3,labels=False,duplicates='drop')
mydf['Citable Items'] = mydf.groupby('Year')['Citable_Items'].transform(pandas.qcut,3,labels=False,duplicates='drop')
mydf

Unnamed: 0,Journal,Year,MRR,JIF,Subscriptions,Citable_Items,JIF_Percentile,ASA,SA,distance,Political Success,JIF Percentile,Citable Items
0,American Psychologist,2004,85.2,5.494000,7.569412,36.0,95.500,0.0,2.907630,2.586370,5.494000,2.0,0.0
1,American Psychologist,2005,76.4,5.977000,7.377134,46.0,96.535,0.0,2.829190,3.147810,5.977000,2.0,1.0
2,American Psychologist,2006,74.8,6.594333,7.334329,46.0,95.455,0.0,2.890892,3.703441,6.594333,2.0,1.0
3,American Psychologist,2007,84.5,6.687500,7.192934,48.0,96.569,0.0,2.899298,3.788202,6.687500,2.0,1.0
4,American Psychologist,2008,77.8,6.771200,7.105786,34.0,96.535,0.0,2.984611,3.786589,6.771200,2.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
511,Teaching Sociology,2012,80.9,0.663500,7.685244,,,1.0,3.300109,-2.636609,3.465115,,
512,Teaching Sociology,2013,84.0,0.781000,7.969704,33.0,57.609,1.0,3.319557,-2.538557,3.485535,0.0,0.0
513,Teaching Sociology,2014,85.3,0.722000,8.004700,26.0,26.408,1.0,3.383563,-2.661563,3.552741,0.0,0.0
514,Teaching Sociology,2015,,0.667667,8.022569,24.0,29.930,1.0,3.398727,-2.731061,3.568664,0.0,0.0


## Preparing data for regression

In [41]:
XY = mydf.copy()
XY = XY[['MRR','Political Success','Subscriptions','JIF Percentile','Citable Items','Journal','Year']]
XY = XY.dropna()
X1 = XY[['Political Success','Subscriptions','JIF Percentile','Citable Items','Journal','Year']]
X1 = pandas.get_dummies(X1,columns=['Journal','Year'])
X1 = X1.drop(['Journal_American Psychologist','Year_2004'],axis=1)
X1['Political Success'] = X1['Political Success']-X1['Political Success'].mean()
X1['Subscriptions'] = X1['Subscriptions']-X1['Subscriptions'].mean()
X1['Political Success*Subscriptions'] = X1['Political Success'] * X1['Subscriptions']
X1['Intercept'] = 1
X0 = X1.drop(['Political Success*Subscriptions'],axis=1)

## Regression

In [44]:
r0 = []
r1 = []
window = 5
reg0 = sm.OLS(XY['MRR'],X0).fit(cov_type='hac-groupsum',cov_kwds={'time':XY.set_index('Year').index,'maxlags':window**2})
r0.append((reg0.params,reg0.bse,reg0.pvalues,reg0.rsquared_adj))
print(reg0.summary())
reg1 = sm.OLS(XY['MRR'],X1).fit(cov_type='hac-groupsum',cov_kwds={'time':XY.set_index('Year').index,'maxlags':window**2})
r1.append((reg1.params,reg1.bse,reg1.pvalues,reg1.rsquared_adj))
reg1.summary()

                            OLS Regression Results                            
Dep. Variable:                    MRR   R-squared:                       0.759
Model:                            OLS   Adj. R-squared:                  0.725
Method:                 Least Squares   F-statistic:                 4.825e+13
Date:                Wed, 28 Apr 2021   Prob (F-statistic):          6.96e-228
Time:                        06:47:51   Log-Likelihood:                -1431.8
No. Observations:                 434   AIC:                             2972.
Df Residuals:                     380   BIC:                             3192.
Df Model:                          53                                         
Covariance Type:         hac-groupsum                                         
                                                             coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------



0,1,2,3
Dep. Variable:,MRR,R-squared:,0.759
Model:,OLS,Adj. R-squared:,0.725
Method:,Least Squares,F-statistic:,-27370000000000.0
Date:,"Wed, 28 Apr 2021",Prob (F-statistic):,1.0
Time:,06:47:51,Log-Likelihood:,-1431.8
No. Observations:,434,AIC:,2974.0
Df Residuals:,379,BIC:,3198.0
Df Model:,54,,
Covariance Type:,hac-groupsum,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Political Success,0.9698,0.185,5.247,0.000,0.608,1.332
Subscriptions,-0.7452,0.282,-2.645,0.008,-1.297,-0.193
JIF Percentile,0.5981,0.513,1.166,0.244,-0.407,1.603
Citable Items,0.2829,0.499,0.567,0.571,-0.696,1.261
Journal_American Sociological Review,18.7803,1.194,15.733,0.000,16.441,21.120
Journal_Behavioral Neuroscience,-18.1498,4.571,-3.970,0.000,-27.110,-9.190
Journal_Contemporary Sociology,-6.6853,2.902,-2.304,0.021,-12.373,-0.998
Journal_Developmental Psychology,0.0196,1.338,0.015,0.988,-2.603,2.643
Journal_Emotion,-2.4617,3.963,-0.621,0.535,-10.230,5.306

0,1,2,3
Omnibus:,41.98,Durbin-Watson:,1.991
Prob(Omnibus):,0.0,Jarque-Bera (JB):,202.276
Skew:,0.191,Prob(JB):,1.19e-44
Kurtosis:,6.323,Cond. No.,148.0


In [45]:
table0 = Table_Summary(r0)
table0 = table0.reindex(['Political Success','Subscriptions','JIF Percentile','Citable Items','Intercept','Adjusted R2'],level=0)
table0 = table0.reindex(['Coeff','CSE','p-val',''],level=1)
table0.columns = ['Base Model']

table1 = Table_Summary(r1)
table1 = table1.reindex(['Political Success','Subscriptions','Political Success*Subscriptions','Citable Items','JIF Percentile','Intercept','Adjusted R2'],level=0)
table1 = table1.reindex(['Coeff','CSE','p-val',''],level=1)
table1.columns = ['Full Model']

table = pandas.merge(table0,table1,on=['Variables','Statistics'],how='outer')
table = table.reindex(['Political Success','Subscriptions','Political Success*Subscriptions','Citable Items','JIF Percentile','Intercept','Adjusted R2'],level=0)
table.round(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Base Model,Full Model
Variables,Statistics,Unnamed: 2_level_1,Unnamed: 3_level_1
Political Success,Coeff,1.036,0.97
Political Success,CSE,0.179,0.185
Political Success,p-val,0.0,0.0
Subscriptions,Coeff,-0.709,-0.745
Subscriptions,CSE,0.272,0.282
Subscriptions,p-val,0.009,0.008
Political Success*Subscriptions,Coeff,,-0.1
Political Success*Subscriptions,CSE,,0.078
Political Success*Subscriptions,p-val,,0.2
Citable Items,Coeff,0.297,0.283


In [47]:
table.to_excel('Table-PS_robust4.xlsx',float_format="%.3f")