# Political success, May 2021

In [28]:
import pandas
import numpy
from numpy import std, correlate, sqrt
from scipy.stats import pearsonr
import statsmodels.api as sm
from linearmodels import IV2SLS
%run Table_Summary.ipynb

In [29]:
data = pandas.read_csv(r'D:/BP/Data/APA_ASA.txt',sep='\t')
data['MAR'] = pandas.to_numeric(data.MAR, errors='coerce')
data['MRR'] = 100 - 100*data['MAR']
data['Subscriptions'] = data['Printed_Subsc_Institutional']
data['Journal'] = data.groupby(['ID'])[['Journal']].fillna(method='ffill')
data['ASA'] = data.groupby(['ID'])[['ASA']].fillna(method='ffill')
data = data[~((data.Journal=='Contexts') | (data.Journal=='Rose'))]
data = data[['Journal','Year','MRR','JIF','Subscriptions','Citable_Items','JIF_Percentile','ASA']]
data = data.set_index(['Journal','Year']).sort_index()
data

Unnamed: 0_level_0,Unnamed: 1_level_0,MRR,JIF,Subscriptions,Citable_Items,JIF_Percentile,ASA
Journal,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
American Psychologist,2004,85.2,5.494,1938.0,36.0,95.500,0.0
American Psychologist,2005,76.4,6.460,1599.0,46.0,96.535,0.0
American Psychologist,2006,74.8,7.829,1532.0,46.0,95.455,0.0
American Psychologist,2007,84.5,6.967,1330.0,48.0,96.569,0.0
American Psychologist,2008,77.8,7.106,1219.0,34.0,96.535,0.0
...,...,...,...,...,...,...,...
Teaching Sociology,2012,80.9,,2176.0,,,1.0
Teaching Sociology,2013,84.0,0.980,2892.0,33.0,57.609,1.0
Teaching Sociology,2014,85.3,0.464,2995.0,26.0,26.408,1.0
Teaching Sociology,2015,,0.559,3049.0,24.0,29.930,1.0


## Variables

In [30]:
mydf = data.copy()
mydf = mydf.reset_index()
mydf['SA']=mydf.groupby(['Year']).JIF.transform('mean')
mydf['SA']=mydf['SA'].rolling(window=3,min_periods=1).mean()
mydf['JIF']=mydf.groupby(['Journal'])['JIF'].rolling(window=3,min_periods=1).mean().reset_index(0,drop=True)
mydf['distance']=mydf['JIF']-mydf['SA']
mydf.loc[mydf.distance<0,'Political Success']=1.05*mydf['SA']
mydf.loc[mydf.distance>0,'Political Success']=mydf['JIF']
mydf['Subscriptions']=numpy.log(mydf['Subscriptions'])
mydf['JIF Percentile'] = mydf.groupby('Year')['JIF_Percentile'].transform(pandas.qcut,2,labels=False,duplicates='drop')
mydf['Citable Items'] = mydf.groupby('Year')['Citable_Items'].transform(pandas.qcut,2,labels=False,duplicates='drop')
mydf

Unnamed: 0,Journal,Year,MRR,JIF,Subscriptions,Citable_Items,JIF_Percentile,ASA,SA,distance,Political Success,JIF Percentile,Citable Items
0,American Psychologist,2004,85.2,5.494000,7.569412,36.0,95.500,0.0,2.907630,2.586370,5.494000,1.0,0.0
1,American Psychologist,2005,76.4,5.977000,7.377134,46.0,96.535,0.0,2.829190,3.147810,5.977000,1.0,0.0
2,American Psychologist,2006,74.8,6.594333,7.334329,46.0,95.455,0.0,2.890892,3.703441,6.594333,1.0,0.0
3,American Psychologist,2007,84.5,7.085333,7.192934,48.0,96.569,0.0,2.896520,4.188813,7.085333,1.0,0.0
4,American Psychologist,2008,77.8,7.300667,7.105786,34.0,96.535,0.0,3.088225,4.212441,7.300667,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
511,Teaching Sociology,2012,80.9,,7.685244,,,1.0,3.339631,,,,
512,Teaching Sociology,2013,84.0,0.980000,7.969704,33.0,57.609,1.0,3.430756,-2.450756,3.602294,0.0,0.0
513,Teaching Sociology,2014,85.3,0.722000,8.004700,26.0,26.408,1.0,3.470613,-2.748613,3.644144,0.0,0.0
514,Teaching Sociology,2015,,0.667667,8.022569,24.0,29.930,1.0,3.374825,-2.707158,3.543566,0.0,0.0


## Preparing data for regression

In [31]:
XY = mydf.copy()
XY = XY[['MRR','Political Success','Subscriptions','JIF Percentile','Citable Items','Journal','Year']]
XY = XY.dropna()
X1 = XY[['Political Success','Subscriptions','JIF Percentile','Citable Items','Journal','Year']]
X1 = pandas.get_dummies(X1,columns=['Journal','Year'])
X1 = X1.drop(['Journal_American Psychologist','Year_2004'],axis=1)
X1['Political Success'] = X1['Political Success']-X1['Political Success'].mean()
X1['Subscriptions'] = X1['Subscriptions']-X1['Subscriptions'].mean()
X1['Political Success*Subscriptions'] = X1['Political Success'] * X1['Subscriptions']
X1['Intercept'] = 1
X0 = X1.drop(['Political Success*Subscriptions'],axis=1)

## Regression

In [34]:
r0 = []
r1 = []
window = 3
reg0 = sm.OLS(XY['MRR'],X0).fit(cov_type='hac-groupsum',cov_kwds={'time':XY.set_index('Year').index,'maxlags':window**2})
r0.append((reg0.params,reg0.bse,reg0.pvalues,reg0.rsquared_adj))
print(reg0.summary())
reg1 = sm.OLS(XY['MRR'],X1).fit(cov_type='hac-groupsum',cov_kwds={'time':XY.set_index('Year').index,'maxlags':window**2})
r1.append((reg1.params,reg1.bse,reg1.pvalues,reg1.rsquared_adj))
reg1.summary()

                            OLS Regression Results                            
Dep. Variable:                    MRR   R-squared:                       0.759
Model:                            OLS   Adj. R-squared:                  0.725
Method:                 Least Squares   F-statistic:                -1.491e+13
Date:                Wed, 28 Apr 2021   Prob (F-statistic):               1.00
Time:                        06:38:00   Log-Likelihood:                -1431.6
No. Observations:                 434   AIC:                             2971.
Df Residuals:                     380   BIC:                             3191.
Df Model:                          53                                         
Covariance Type:         hac-groupsum                                         
                                                             coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------



0,1,2,3
Dep. Variable:,MRR,R-squared:,0.759
Model:,OLS,Adj. R-squared:,0.725
Method:,Least Squares,F-statistic:,-104400000000000.0
Date:,"Wed, 28 Apr 2021",Prob (F-statistic):,1.0
Time:,06:38:01,Log-Likelihood:,-1431.4
No. Observations:,434,AIC:,2973.0
Df Residuals:,379,BIC:,3197.0
Df Model:,54,,
Covariance Type:,hac-groupsum,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Political Success,0.5500,0.247,2.222,0.026,0.065,1.035
Subscriptions,-0.6567,0.391,-1.678,0.093,-1.424,0.110
JIF Percentile,0.6763,0.847,0.799,0.424,-0.983,2.336
Citable Items,-1.9261,0.830,-2.321,0.020,-3.553,-0.300
Journal_American Sociological Review,16.6553,1.561,10.670,0.000,13.596,19.715
Journal_Behavioral Neuroscience,-18.3970,4.643,-3.962,0.000,-27.497,-9.297
Journal_Contemporary Sociology,-9.8077,3.362,-2.917,0.004,-16.396,-3.219
Journal_Developmental Psychology,-0.0091,1.692,-0.005,0.996,-3.325,3.307
Journal_Emotion,-2.4403,5.181,-0.471,0.638,-12.594,7.714

0,1,2,3
Omnibus:,42.55,Durbin-Watson:,1.995
Prob(Omnibus):,0.0,Jarque-Bera (JB):,216.138
Skew:,0.163,Prob(JB):,1.16e-47
Kurtosis:,6.442,Cond. No.,141.0


In [35]:
table0 = Table_Summary(r0)
table0 = table0.reindex(['Political Success','Subscriptions','JIF Percentile','Citable Items','Intercept','Adjusted R2'],level=0)
table0 = table0.reindex(['Coeff','CSE','p-val',''],level=1)
table0.columns = ['Base Model']

table1 = Table_Summary(r1)
table1 = table1.reindex(['Political Success','Subscriptions','Political Success*Subscriptions','Citable Items','JIF Percentile','Intercept','Adjusted R2'],level=0)
table1 = table1.reindex(['Coeff','CSE','p-val',''],level=1)
table1.columns = ['Full Model']

table = pandas.merge(table0,table1,on=['Variables','Statistics'],how='outer')
table = table.reindex(['Political Success','Subscriptions','Political Success*Subscriptions','Citable Items','JIF Percentile','Intercept','Adjusted R2'],level=0)
table.round(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Base Model,Full Model
Variables,Statistics,Unnamed: 2_level_1,Unnamed: 3_level_1
Political Success,Coeff,0.64,0.55
Political Success,CSE,0.3,0.247
Political Success,p-val,0.033,0.026
Subscriptions,Coeff,-0.557,-0.657
Subscriptions,CSE,0.378,0.391
Subscriptions,p-val,0.141,0.093
Political Success*Subscriptions,Coeff,,-0.221
Political Success*Subscriptions,CSE,,0.109
Political Success*Subscriptions,p-val,,0.043
Citable Items,Coeff,-1.778,-1.926


In [36]:
table.to_excel('Table-PS_robust3.xlsx',float_format="%.3f")