# Transform data
Pre-process data records used in original research articles
- assign column types, clean
- rename, reorder columns
- merge tables on keys

In [1]:
import pandas as pd

**PAPERS** from ICPSR Bibliography (DBInfo)

In [2]:
df_paper = pd.read_excel('./data_original/ICPSR_bib_studies_20211111.xlsx', sheet_name=0)
df_paper = df_paper[df_paper['STUD_NUMS'].notna()] # remove papers that do not have study numbers

df_paper['REF_ID'] = df_paper['REF_ID'].astype('int')
df_paper['TITLE'] = df_paper['TITLE'].str.strip() # remove whitespace from original record entry
df_paper['AUTHORS'] = df_paper['AUTHORS'].str.strip()
df_paper['SEC_TITLE'] = df_paper['SEC_TITLE'].str.strip()
df_paper['RIS_TYPE'] = df_paper['RIS_TYPE'].str.strip()
df_paper['RIS_TYPE'] = df_paper['RIS_TYPE'].astype('category')
df_paper['FUNDER'] = df_paper['FUNDER'].str.strip()
df_paper['YEAR_PUB'] = df_paper['YEAR_PUB'].astype('int')
df_paper['DATE_INPUT'] = df_paper['DATE_INPUT'].astype('datetime64[ns]')
df_paper['STUD_NUMS'] = df_paper['STUD_NUMS'].astype('str')

df_paper = df_paper.rename(columns={'STUD_NUMS':'STUDY_NUMS'})

df_paper = df_paper[['REF_ID',
                     'DOI',
                     'TITLE',
                     'AUTHORS',
                     'SEC_TITLE',
                     'RIS_TYPE',
                     'FUNDER',
                     'YEAR_PUB',
                     'DATE_INPUT',
                     'SERIES_NUMS',
                     'STUDY_NUMS']]

df_paper.to_csv('./data_transform/ICPSR_PAPERS.csv')
df_paper.info()

<class 'pandas.core.frame.DataFrame'>
Index: 94755 entries, 0 to 99648
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   REF_ID       94755 non-null  int64         
 1   DOI          43063 non-null  object        
 2   TITLE        94750 non-null  object        
 3   AUTHORS      94754 non-null  object        
 4   SEC_TITLE    78193 non-null  object        
 5   RIS_TYPE     94755 non-null  category      
 6   FUNDER       490 non-null    object        
 7   YEAR_PUB     94755 non-null  int64         
 8   DATE_INPUT   94079 non-null  datetime64[ns]
 9   SERIES_NUMS  2338 non-null   object        
 10  STUDY_NUMS   94755 non-null  object        
dtypes: category(1), datetime64[ns](1), int64(2), object(7)
memory usage: 8.0+ MB


In [3]:
print(df_paper['DATE_INPUT'].min()) # summarize temporal coverage
print(df_paper['DATE_INPUT'].max())

2000-08-11 00:00:00
2021-11-16 17:21:25


In [4]:
df_paper.sample(5)

Unnamed: 0,REF_ID,DOI,TITLE,AUTHORS,SEC_TITLE,RIS_TYPE,FUNDER,YEAR_PUB,DATE_INPUT,SERIES_NUMS,STUDY_NUMS
59868,112300,,National Evaluation of the Community Anti-Crim...,American Institutes for Research,,RPRT,,1979,2013-09-26 00:00:00,,8704
84440,142684,10.1093/geroni/igx004.4311,Does Social Engagement Mitigate Declines in Co...,"Brustrom, J.; Liu, T.; Greek, A.A.; Hougham, G.W.",21st International Association of Gerontology ...,CONF,,2017,2019-10-29 17:00:04,,37107
30981,36767,10.1097/00007435-200201000-00003,"American Adolescents: Sexual Mixing Patterns, ...","Ford, K.; Sohn, W.; Lepkowski, J.",Sexually Transmitted Diseases,JOUR,,2002,2005-06-30 00:00:00,,21600
74059,130306,,Are Youth and Young Adults Who First Try a Fla...,"Villanti, Andrea",23rd Annual Meeting of the Society for Researc...,CONF,,2017,2018-06-12 15:44:53,,36498
42779,77608,10.1016/j.ssresearch.2005.04.002,Can differential exposure to risk factors expl...,"Phillips, Julie A.; Sweeney, Megan M.",Social Science Research,JOUR,,2006,2009-10-05 00:00:00,,6960


**STUDIES** from ICPSR catalog (DBInfo)

In [5]:
df_study = pd.read_excel('./data_original/ICPSR_bib_studies_20211111.xlsx', sheet_name=1)
df_study = df_study[df_study['PERMIT']=='AVAILABLE'] # remove studies that are not publicly available
df_study = df_study[df_study['OBJECTTYPE']=='study'] # remove union catalog entries

df_study['DESCRIPTION'] = df_study['DESCRIPTION_1'].astype('str') \
    + " " + df_study['DESCRIPTION_2'].astype('str') \
    + " " + df_study['DESCRIPTION_3'].astype('str') + " " \
    + df_study['DESCRIPTION_4'].astype('str') \
    + " " + df_study['DESCRIPTION_5'].astype('str') # combine study description to a single field

df_study = df_study.drop(columns=['PERMIT',
                                  'OBJECTTYPE',
                                  'ALTTITLE1',
                                  'ALTTITLE2',
                                  'ALTTITLE3',
                                  'ALTTITLE4',
                                  'ALTTITLE5',
                                  'ALTTITLE6',
                                  'ALTTITLE7',
                                  'DESCRIPTION_1',
                                  'DESCRIPTION_2',
                                  'DESCRIPTION_3',
                                  'DESCRIPTION_4',
                                  'DESCRIPTION_5'])

df_study['STUDY'] = df_study['STUDY'].astype('int')
df_study['NAME'] = df_study['NAME'].str.strip()
df_study['SERIES'] = df_study['SERIES'].astype('str').astype('float')
df_study['SERIES_TITLE'] = df_study['SERIES_TITLE'].str.strip()
df_study['OWNER'] = df_study['OWNER'].astype('category')
df_study['FUNDINGAGENCY'] = df_study['FUNDINGAGENCY'].str.strip()
df_study['DOI'] = df_study['DOI'].str.strip()
df_study['GEO'] = df_study['GEO'].str.strip()
df_study['TERMS'] = df_study['TERMS'].str.strip()
df_study['ORIGRELDATE'] = df_study['ORIGRELDATE'].astype('datetime64[ns]')
df_study['MEMSERV_PI'] = df_study['MEMSERV_PI'].str.strip()
df_study['DESCRIPTION'] = df_study['DESCRIPTION'].str.strip()

df_study = df_study.rename(columns={'FUNDINGAGENCY':'FUNDING_AGENCY','ORIGRELDATE':'RELEASE_DATE','MEMSERV_PI':'PRINCIPAL_INV'})
df_study.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10610 entries, 0 to 11660
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   STUDY           10610 non-null  int64         
 1   NAME            10610 non-null  object        
 2   SERIES          6683 non-null   float64       
 3   SERIES_TITLE    6683 non-null   object        
 4   OWNER           10610 non-null  category      
 5   FUNDING_AGENCY  5256 non-null   object        
 6   DOI             10610 non-null  object        
 7   GEO             9956 non-null   object        
 8   TERMS           10155 non-null  object        
 9   RELEASE_DATE    10610 non-null  datetime64[ns]
 10  PRINCIPAL_INV   10610 non-null  object        
 11  DESCRIPTION     10610 non-null  object        
dtypes: category(1), datetime64[ns](1), float64(1), int64(1), object(8)
memory usage: 1005.8+ KB


Study table: usage

In [6]:
df_usage = pd.read_excel('./data_original/ICPSR_combined_study_usage_20210430_onlyData_noAllREST.xlsx',sheet_name=0)
df_usage = df_usage.drop(columns=['DATA_PULL_DATE','ORIGRELDATE','OWNER','OWNER_ICPSR','MEMBER','DAYSINSAMPLE_TO20210430','DAYSINSAMPLE_TO20151231','RECENCY','SERIESYN','VARS','SAMPLING','PROPORTIONREST','TOT_DATA', 'NUMTERMS'])

df_usage['STUDY'] = df_usage['STUDY'].astype('int')
df_usage['SINGLEPI'] = df_usage['SINGLEPI'].astype('int')
df_usage['INST_PI'] = df_usage['INST_PI'].astype('int')
df_usage['TOT_PI'] = df_usage['TOT_PI'].astype('int')
df_usage['SDA'] = df_usage['SDA'].astype('int')
df_usage['QTEXT'] = df_usage['QTEXT'].astype('float')
df_usage['SSVD'] = df_usage['SSVD'].astype('float')
df_usage['USERS_2017_TO_PULLDATE'] = df_usage['USERS_2017_TO_PULLDATE'].astype('float')
df_usage['DATAUSERS_2017_TO_PULLDATE'] = df_usage['DATAUSERS_2017_TO_PULLDATE'].astype('float')
df_usage['HAS_RESTRICTED'] = df_usage['HAS_RESTRICTED'].astype('int')
df_usage['ALL_RESTRICTED'] = df_usage['ALL_RESTRICTED'].astype('int')

df_usage = df_usage.rename(columns={'USERS_2017_TO_PULLDATE':'USERS_TO_20210511','DATAUSERS_2017_TO_PULLDATE':'DATAUSERS_TO_20210511','SINGLEPI':'SINGLE_PI'})
df_usage.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 461 entries, 0 to 460
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   STUDY                  461 non-null    int64  
 1   SINGLE_PI              461 non-null    int64  
 2   INST_PI                461 non-null    int64  
 3   TOT_PI                 461 non-null    int64  
 4   SDA                    461 non-null    int64  
 5   QTEXT                  453 non-null    float64
 6   SSVD                   453 non-null    float64
 7   USERS_TO_20210511      457 non-null    float64
 8   DATAUSERS_TO_20210511  455 non-null    float64
 9   HAS_RESTRICTED         461 non-null    int64  
 10  ALL_RESTRICTED         461 non-null    int64  
dtypes: float64(4), int64(7)
memory usage: 39.7 KB


Study table: curation level

In [7]:
df_curation = pd.read_csv('./data_original/jira_curation_level_20210426_by_study.csv')
df_curation = df_curation.drop(columns=['cur_lev_rank'])
df_curation['study'] = df_curation['study'].astype('int')
df_curation['curation_level'] = df_curation['curation_level'].astype('category')
df_curation = df_curation.rename(columns={'study':'STUDY','curation_level':'CURATION_LEVEL'})
df_curation.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1367 entries, 0 to 1366
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype   
---  ------          --------------  -----   
 0   STUDY           1367 non-null   int64   
 1   CURATION_LEVEL  1367 non-null   category
dtypes: category(1), int64(1)
memory usage: 12.4 KB


Study table: citations (count number of study citations from Bibliography)

In [8]:
df_paper['STUDY'] = df_paper['STUDY_NUMS'].str.split(";") # split STUD_NUMS so that each row is one study
df_paper = df_paper.explode('STUDY')
df_paper['STUDY'] = df_paper['STUDY'].astype('int')

df_citations = pd.DataFrame(df_paper['STUDY'].value_counts()).reset_index()
df_citations = df_citations.rename(columns={'count':'CITATIONS_TO_20211116'})
df_citations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8377 entries, 0 to 8376
Data columns (total 2 columns):
 #   Column                 Non-Null Count  Dtype
---  ------                 --------------  -----
 0   STUDY                  8377 non-null   int64
 1   CITATIONS_TO_20211116  8377 non-null   int64
dtypes: int64(2)
memory usage: 131.0 KB


Study table: variable counts

In [9]:
df_variables = pd.read_csv('./data_original/variable_counts_by_study.csv')
df_variables['STUDY'] = df_variables['STUDY'].astype('int')
df_variables['TOTALVARS'] = df_variables['TOTALVARS'].astype('int')
df_variables = df_variables.rename(columns={'TOTALVARS':'TOTAL_VARS'})
df_variables.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9485 entries, 0 to 9484
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype
---  ------      --------------  -----
 0   STUDY       9485 non-null   int64
 1   TOTAL_VARS  9485 non-null   int64
dtypes: int64(2)
memory usage: 148.3 KB


Merge STUDY tables

In [10]:
df_study_merge = df_study.merge(df_usage, on='STUDY', how='left') \
    .merge(df_curation, on='STUDY', how='left') \
    .merge(df_citations, on='STUDY', how='left') \
    .merge(df_variables, on='STUDY', how='left') # merge all tables on main list of studies

df_study_merge = df_study_merge[['STUDY', 
                               'DOI',
                               'NAME',
                               'SERIES',
                               'SERIES_TITLE',
                               'PRINCIPAL_INV',
                               'DESCRIPTION',
                               'RELEASE_DATE',
                               'FUNDING_AGENCY',
                               'GEO',
                               'TERMS',
                               'OWNER',
                               'CURATION_LEVEL',
                               'SINGLE_PI',
                               'INST_PI',
                               'TOT_PI',
                               'TOTAL_VARS',
                               'SDA',
                               'QTEXT',
                               'SSVD',
                               'HAS_RESTRICTED',
                               'ALL_RESTRICTED',
                               'USERS_TO_20210511',
                               'DATAUSERS_TO_20210511',
                               'CITATIONS_TO_20211116']]

df_study_merge.to_csv('./data_transform/ICPSR_STUDIES.csv')
df_study_merge.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10610 entries, 0 to 10609
Data columns (total 25 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   STUDY                  10610 non-null  int64         
 1   DOI                    10610 non-null  object        
 2   NAME                   10610 non-null  object        
 3   SERIES                 6683 non-null   float64       
 4   SERIES_TITLE           6683 non-null   object        
 5   PRINCIPAL_INV          10610 non-null  object        
 6   DESCRIPTION            10610 non-null  object        
 7   RELEASE_DATE           10610 non-null  datetime64[ns]
 8   FUNDING_AGENCY         5256 non-null   object        
 9   GEO                    9956 non-null   object        
 10  TERMS                  10155 non-null  object        
 11  OWNER                  10610 non-null  category      
 12  CURATION_LEVEL         1125 non-null   category      
 13  S

In [11]:
df_study_merge['OWNER'].unique()

['ICPSR', 'NACJD', 'DSDR', 'RCMD', 'NAHDAP', ..., 'gmsdata', 'pcoridata', 'pcodr', 'DAIRL', 'appfed']
Length: 21
Categories (21, object): ['ADDEP', 'AERA', 'APA', 'CFDA', ..., 'gmsdata', 'odf', 'pcodr', 'pcoridata']

In [12]:
df_study_merge['CURATION_LEVEL'].unique()

[NaN, 'Level 1', 'Other', 'Level 2', 'Level 3', 'Fast Release', 'Level 0']
Categories (6, object): ['Fast Release', 'Level 0', 'Level 1', 'Level 2', 'Level 3', 'Other']

In [13]:
print(df_study_merge['RELEASE_DATE'].min()) # summarize temporal coverage
print(df_study_merge['RELEASE_DATE'].max())

1984-03-18 00:00:00
2021-10-28 11:29:31


In [14]:
df_study_merge.sample(5)

Unnamed: 0,STUDY,DOI,NAME,SERIES,SERIES_TITLE,PRINCIPAL_INV,DESCRIPTION,RELEASE_DATE,FUNDING_AGENCY,GEO,...,TOT_PI,TOTAL_VARS,SDA,QTEXT,SSVD,HAS_RESTRICTED,ALL_RESTRICTED,USERS_TO_20210511,DATAUSERS_TO_20210511,CITATIONS_TO_20211116
6593,20040,10.3886/ICPSR20040.v1,Re-examination of the Criminal Deterrent Effec...,,,"Cohen-Cole, Ethan, Steven Durlauf, Jeffrey Fag...",The purpose of this study was to estimate the ...,2008-01-31 00:00:00,United States Department of Justice. Office of...,United States,...,,,,,,,,,,2.0
143,1075,10.3886/ICPSR01075.v1,Assessing Applied Econometric Results,,,"Christ, Carl F., and David A. Dickey",These data and/or computer programs are part o...,1996-01-03 00:00:00,,United States,...,,,,,,,,,,1.0
5653,9386,10.3886/ICPSR09386.v1,"Canadian National Election Study, 1988",201.0,Canadian National Elections Study (CNES) Series,"Johnston, Richard","This collection, containing information on the...",1990-10-16 00:00:00,Social Sciences and Humanities Research Counci...,Canada; Global,...,,439.0,,,,,,,,18.0
10178,37470,10.3886/ICPSR37470.v1,Understanding Online Hate Speech as a Motivato...,,,"Cahill, Meagan, Katya Migacheva, Jirka Taylor,...","<p> In the United States, a number of challeng...",2021-07-28 13:47:31,United States Department of Justice. Office of...,California; Los Angeles; United States,...,,,,,,,,,,1.0
10111,37374,10.3886/ICPSR37374.v1,Federal Justice Statistics Program: Suspects i...,73.0,Federal Justice Statistics Program Data Series,United States Department of Justice. Office of...,The data contain records of suspects in federa...,2019-09-25 11:13:56,United States Department of Justice. Office of...,United States,...,,39.0,,,,,,,,4.0


**PREDICTED CURATION ACTIVITIES** from Jira work logs

In [15]:
df_action = pd.read_csv('./data_original/predicted_curation_by_study.csv')
df_action = df_action.drop(columns=['Unnamed: 0']) # remove sentence index
df_action = df_action.rename(columns={'Studies':'STUDY','Desc':'WORK_DESC','Action':'ACTION_LABEL','Log_hrs':'LOG_HRS','Study_hrs':'STUDY_HRS'})
df_action['STUDY'] = df_action['STUDY'].str.replace("s","") # remove "s" from study number
df_action['STUDY'] = df_action['STUDY'].str.strip()
df_action['STUDY'] = df_action['STUDY'].astype('int')
df_action['WORK_DESC'] = df_action['WORK_DESC'].str.strip()
df_action['ACTION_LABEL'] = df_action['ACTION_LABEL'].str.strip()
df_action['ACTION_LABEL'] = df_action['ACTION_LABEL'].astype('category')
df_action.to_csv('./data_transform/ICPSR_CURATION_LOGS.csv')
df_action.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13425 entries, 0 to 13424
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   STUDY         13425 non-null  int64   
 1   WORK_DESC     13407 non-null  object  
 2   ACTION_LABEL  13425 non-null  category
 3   LOG_HRS       12216 non-null  float64 
 4   STUDY_HRS     13425 non-null  float64 
dtypes: category(1), float64(2), int64(1), object(1)
memory usage: 433.1+ KB


In [16]:
df_action.ACTION_LABEL.unique()

['Metadata-study-level', 'Quality-checks', 'Non-curation', 'Initial-review-and-planning', 'Communication-for-study', 'Transformation-of-data', 'Documentation', 'Other']
Categories (8, object): ['Communication-for-study', 'Documentation', 'Initial-review-and-planning', 'Metadata-study-level', 'Non-curation', 'Other', 'Quality-checks', 'Transformation-of-data']

In [17]:
curation_studies = df_action['STUDY'].unique().tolist()
mask = df_study_merge['STUDY'].isin(curation_studies)
subset = df_study_merge.loc[~mask]

print(subset.RELEASE_DATE.min()) # note: document system-level issue with earliest release date
print(subset.RELEASE_DATE.max())

1984-03-18 00:00:00
2021-10-28 11:29:31


In [18]:
df_action.head(5)

Unnamed: 0,STUDY,WORK_DESC,ACTION_LABEL,LOG_HRS,STUDY_HRS
0,37216,submit metadata to vanessa,Metadata-study-level,6.5,712.5
1,37216,submit for 2qc,Quality-checks,1.0,712.5
2,37216,2qc,Quality-checks,8.0,712.5
3,37216,metadata review,Metadata-study-level,4.0,712.5
4,37216,1 hour vacation time,Non-curation,7.0,712.5


**PROCESSING HISTORY COMMANDS** from SPSS processing history files

In [19]:
df_processing = pd.read_csv('./data_original/processing_history_commands_2019_20220502.csv')
df_processing = df_processing.drop(columns=['filename','savedate'])
df_processing.columns = [x.upper() for x in df_processing.columns]
df_processing['STUDY'] = df_processing['STUDY'].astype('int')
df_processing['TOTAL_LINES'] = df_processing['TOTAL_LINES'].astype('int')
df_processing['COMMENTS'] = df_processing['COMMENTS'].astype('int')
df_processing.to_csv('./data_transform/ICPSR_PROCESSING_HISTORY.csv')

for column in df_processing.columns:
    print(column)

STUDY
TOTAL_LINES
COMMENTS
GET
WEIGHT
FORMATS
VALUE_LABELS
RENAME_VARIABLES
VARIABLE_LABELS
ADD_VALUE_LABELS
MISSING_VALUES
SAVE
DISPLAY
FREQUENCIES
SLOC
RECODE
SET
ALTER_TYPE
STRING
COMPUTE
GET_DATA
CACHE
EXECUTE
INPUT_PROGRAM
AGGREGATE
DATASET_NAME
SORT_CASES
MATCH_FILES
INSERT
NUMERIC
IF
ERASE
DO_IF
APPLY_DICTIONARY
DELETE_VARIABLES
CD
DEFINE
CROSSTABS
FILE_HANDLE
DATA_LIST
INCLUDE
GET_SAS
DATASET_ACTIVATE
ADD_FILES
GET_STATA
NEW_FILE
RELIABILITY
RESPONSE_RATE
DO_REPEAT
WRITE
SAMPLE
SPECTRA
REGRESSION
DATE
RECORD_TYPE
RATIO_STATISTICS
HOMALS
PRINT_FORMATS
DESCRIPTIVES
LEAVE
VECTOR
SELECT_IF
FILTER
USE
IMPORT
BEGIN_DATA
CURVEFIT
STAR_JOIN
FINISH
LIST
READ_MODEL
XSAVE
SHOW
REFORMAT
CONJOINT
FLIP
AUTORECODE
SUBTITLE
PROBIT
VERIFY
DATASET_CLOSE
VARIABLE_LEVEL
VARIABLE_WIDTH
TEMPORARY
TITLE
COMMENT
PLANCARDS
REREAD
PARTIAL_CORR
RESTORE
VARSTOCASES
CODEBOOK
EXPORT
SURVIVAL
LOOP
GRAPH
NONPAR_CORR
PERMISSIONS
ORTHOPLAN
FILE_TYPE
COUNT
BREAK
DROP_DOCUMENTS
OUTPUT_SAVE
TSET
MEANS
RANK
SPLIT_F

In [20]:
processing_studies = df_processing['STUDY'].unique().tolist()
mask = df_study_merge['STUDY'].isin(processing_studies)
subset = df_study_merge.loc[~mask]

print(subset.RELEASE_DATE.min()) # summarize temporal coverage
print(subset.RELEASE_DATE.max())

1984-03-18 00:00:00
2021-10-28 11:29:31


In [21]:
df_processing.sample(5)

Unnamed: 0,STUDY,TOTAL_LINES,COMMENTS,GET,WEIGHT,FORMATS,VALUE_LABELS,RENAME_VARIABLES,VARIABLE_LABELS,ADD_VALUE_LABELS,...,SUMMARIZE,OMS,OMSEND,TABLES,AIM,ANACOR,PREDICT,NPAR_TESTS,TSPLOT,XGRAPH
7511,37128,531,9,2.0,,11.0,66.0,7.0,77.0,,...,,,,,,,,,,
720,22920,184,27,2.0,,,4.0,1.0,7.0,,...,,,,,,,,,,
8008,25426,93,7,1.0,,,,,,,...,,,,,,,,,,
2448,36276,648,22,2.0,,,261.0,,20.0,,...,,,,,,,,,,
6368,25863,241,18,1.0,,,,,,,...,,,,,,,,,,
