# PVE-analysis on research funding
*Collective Compass Project, DJA, July 2022*


---
## Loading the necessary Python modules.

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import itertools
import requests
plt.rcParams.update({'font.size': 14}) # must set in top
plt.rcParams.update({'figure.figsize': (14, 4)}) # must set in top

In [None]:
# download a fresh copy of the dataset from the OSF
data_url = 'https://osf.io/f76rb//?action=download'
response = requests.get(data_url)

if response.status_code == 200:
    with open('../data/dat.csv', 'wb') as f:
        f.write(response.content)

In [None]:
df = pd.read_csv('../data/dat.csv')

---
## Plot basic participation stats per stratification of choice

In [None]:
plt.figure()
plt.subplot(121)
df['institution'].value_counts().plot.bar()
plt.grid()
plt.subplot(122)
df['versie'].value_counts().plot.bar();

In [None]:
df_vals = df[['institution', 'versie']].value_counts()
df_vals.unstack('versie').plot.bar(stacked=True);

In [None]:
df_vals = df[['pos5', 'sex']].value_counts()
df_vals.unstack('sex').plot.bar(stacked=True);

---
## Postprocess the kt values to fraction of the total budget.

In [None]:
# Second, we multiply the kt values with the values above and normalize with the total
# to make each kt a fraction of the total money spend.
mask = df.columns.str.contains('funds_available|versie|kt_funds_*')
df_kt = df.iloc[:, mask].copy()
df_kt = df_kt.dropna()

for col in df_kt.iloc[:, df_kt.columns.str.contains('kt_funds*')]:
    df_kt.loc[:, col] /= df_kt.loc[:, 'funds_available']

In [None]:
# Create a table with the fraction spent per kt item per group.
kts = list( df_kt.iloc[:, df_kt.columns.str.contains('kt_funds*')] )
df_kt_per_version = df_kt.groupby('versie')[ kts ]
df_kt_per_version.mean()

In [None]:
# Print the fraction of the total money spent per experiment version.
df_kt_per_version.mean().sum(axis=1)

In [None]:
# Plot a histogram with a bar per version.
df_kt_per_version.mean().transpose().plot.bar()
plt.grid()

In [None]:
# Plot a histogram per version.
df_kt_per_version.mean().plot.bar()
plt.grid()