-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #17 from YeChen-IDM/plotting_update_2
Translate R plotting scripts into Python and add get_version.py
- Loading branch information
Showing
82 changed files
with
2,903 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
64 changes: 64 additions & 0 deletions
64
create_plots/archive_organization/_20220603/helper_functions_age_inc_prev.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
from plotnine import ggplot, aes, geom_line, geom_point, theme_bw, xlab, ylab, scale_color_manual, facet_wrap | ||
import numpy as np | ||
import pandas as pd | ||
from pandas.api.types import CategoricalDtype | ||
|
||
|
||
def get_substr(site_name_str, index): | ||
return site_name_str.split("_")[1][index] | ||
|
||
|
||
def get_mean_from_upper_age(cur_age, upper_ages): | ||
if not upper_ages or cur_age not in upper_ages: | ||
return None | ||
else: | ||
mean_ages = [upper_ages[0] / 2] | ||
for i in range(len(upper_ages) - 1): | ||
mean_ages.append((upper_ages[i] + upper_ages[i + 1]) / 2) | ||
return mean_ages[upper_ages.index(cur_age)] | ||
|
||
|
||
def plot_inc_ref_sim_comparison(sim_df, ref_df): | ||
""" | ||
Plot age-incidence comparisons with reference | ||
Args: | ||
sim_df (): | ||
ref_df (): | ||
Returns: | ||
""" | ||
sim_df['Incidence'] = sim_df['Incidence'] * sim_df['p_detect_case'] | ||
|
||
# set up reference and simulation dataset columns | ||
ref_df['Incidence'] = ref_df['INC'] / 1000 | ||
ref_df['mean_age'] = (ref_df['INC_LAR'] + ref_df['INC_UAR']) / 2 | ||
ref_df = pd.DataFrame(data={'Incidence': ref_df['Incidence'], | ||
'mean_age': ref_df['mean_age'], | ||
'Site': ref_df['Site'], | ||
'Pop_size': ref_df['POP'], | ||
'year': ref_df['START_YEAR']}) | ||
sim_df = pd.DataFrame(data={'Incidence': sim_df['Incidence'], | ||
'mean_age': sim_df['mean_age'], | ||
'Site': sim_df['Site'], | ||
'Pop_size': np.nan, | ||
'year': np.nan}) | ||
|
||
ref_df['source'] = 'reference' | ||
sim_df['source'] = 'simulation' | ||
|
||
df_combined = pd.concat(sim_df, ref_df) | ||
|
||
source_cat = CategoricalDtype(categories=['reference', 'simulation'], ordered=True) | ||
df_combined['source'] = df_combined['source'].astype(source_cat) | ||
|
||
gg = (ggplot(df_combined, aes(x='mean_age', y='Incidence', color='source', group='year')) | ||
+ geom_line() | ||
+ geom_point() | ||
+ scale_color_manual(values={"reference": 'red', | ||
"simulation": 'blue'}) | ||
+ xlab('age (midpoint of age bin)') | ||
+ ylab('incidence') | ||
+ facet_wrap('Site', ncol=4) | ||
+ theme_bw()) | ||
return gg |
144 changes: 144 additions & 0 deletions
144
create_plots/archive_organization/_20220603/helper_functions_infection_duration.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
##################################################################################### | ||
# compare_ref_sim_infection_duration_Navrongo | ||
# contact: mambrose | ||
# Jan 2022 | ||
# | ||
# Goal: simulate transmission in EMOD and sample to match reference dataset | ||
# Compare: | ||
# - probability a person goes from negative to positive between sample dates | ||
# - probability a person goes from positive to negative between sample dates | ||
# - fraction of samples positive | ||
# - among all individuals starting positive, how long until negative? | ||
# - among all individuals turning positive during study, how long until negative? | ||
##################################################################################### | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import os.path as path | ||
import datetime | ||
import math | ||
import random | ||
|
||
|
||
def get_sim_survey(sim_dir, ref_df, seeds=np.nan): | ||
""" | ||
Get subset of simulation dataset to match reference dataset for dates and ages of individuals of sampled individuals | ||
Args: | ||
sim_dir (): | ||
ref_df (): | ||
seeds (): | ||
Returns: | ||
""" | ||
sampled_sim_filename = 'sim_duration_survey_sampling.csv' | ||
file_path = path.join(sim_dir, sampled_sim_filename) | ||
if path.isfile(file_path): | ||
sim_subset_full = pd.read_csv(file_path) | ||
else: | ||
# Get first year of sampling in reference dataset. the simulation will be referenced from the first day of | ||
# that year | ||
first_ref_date = datetime.date(datetime.datetime.strptime(ref_df['date'].dropna().min(), "%Y-%m-%d").year, 1, 1) | ||
indIDs = ref_df['SID'].unique() | ||
|
||
patient_report_path = path.join(sim_dir, 'patient_reports.csv') | ||
|
||
sim_full = pd.read_csv(patient_report_path) | ||
sim_full['date'] = first_ref_date + sim_full['simday'] | ||
sim_full['age'] = sim_full['age'] / 365 | ||
|
||
if math.isnan(seeds): | ||
seeds = sim_full['Run_Number'].unique() | ||
|
||
for seed in sorted(seeds): | ||
print('Currently on seed ' + seed) | ||
sim = sim_full[sim_full['Run_Number'] == seed] # subset to desired run | ||
# track which individuals have already been included from the simulation ( | ||
# to avoid double-sampling simulation individuals) | ||
included_ids = list() | ||
sim_subset = pd.DataFrame() | ||
for ii in range(len(indIDs)): | ||
if ii % 50 == 0: | ||
print('Currently on individual ' + ii + ' out of ', len(indIDs)) | ||
ref_df_cur = ref_df[ref_df['SID'] == indIDs[ii]] | ||
ref_df_cur = ref_df_cur.sort_values(by='date') | ||
# find a matching individual | ||
age_cur = ref_df_cur['age'].iloc[0] | ||
day_cur = ref_df_cur['date'].iloc[0] | ||
|
||
# use age-specific matches | ||
id_candidates = sim[(sim['date'] == day_cur) & (round(sim['age'] == round(age_cur)))]['id'] | ||
id_candidates = [idx not in included_ids for idx in id_candidates] | ||
# if no perfect age-match remain, expand year-range until finding a match | ||
if len(id_candidates) == 0: | ||
year_range = 0 | ||
while len(id_candidates) == 0 & year_range < 100: | ||
year_range = year_range + 5 | ||
id_candidates = sim[(sim['date'] == day_cur) | ||
& (sim['age'].round().isin(range((round(age_cur)-year_range), | ||
round(age_cur)+year_range)))]['id'] | ||
id_candidates = [idx not in included_ids for idx in id_candidates] | ||
|
||
if len(id_candidates) == 0: | ||
print('Problem: no age-matched simulation individual found for reference id: ' + indIDs[ii]) | ||
else: | ||
print('No exact age match remaining for reference id: ' + indIDs[ii] | ||
+ '. Used simulation individual within ', year_range, ' years.') | ||
|
||
id_sim_cur = random.sample(id_candidates, 1) # todo: should we remove this id after drawing? | ||
included_ids.extend(id_sim_cur) | ||
|
||
# keep the same simulation dates as the reference samples for this individual | ||
sim_subset_cur = sim[(sim['id'] == id_sim_cur) & (sim['date'].isin(ref_df_cur['date']))] | ||
sim_subset = pd.concat(sim_subset, sim_subset_cur) | ||
|
||
sim_subset['seed'] = seed | ||
if seed == sorted(seeds)[1]: | ||
sim_subset_full = sim_subset | ||
else: | ||
sim_subset_full = pd.concat(sim_subset_full, sim_subset) | ||
|
||
# rename simulation columns to match reference data | ||
sim_subset_full.rename(columns={'id': 'SID', 'true_asexual_parasites': 'DENSITY'}, inplace=True) | ||
sim_subset_full.to_csv(file_path, index=False) | ||
|
||
return sim_subset_full | ||
|
||
|
||
# def get_frac_state_swaps(data): | ||
# """ | ||
# Calculate probability of going from negative to positive or from positive to negative between sample dates | ||
# Args: | ||
# data (): | ||
# | ||
# Returns: | ||
# | ||
# """ | ||
# # brute force approach iterating through people and dates | ||
# indIDs = data['SID'].unique() | ||
# sum_denom_pos = 0 | ||
# sum_turn_neg = 0 | ||
# sum_denom_neg = 0 | ||
# sum_turn_pos = 0 | ||
# | ||
# for ii in range(len(indIDs)): | ||
# data_cur = data[data['SID'] == indIDs[ii]] | ||
# data_cur = data_cur.sort_values(by=['date']) | ||
# # get indices of positive tests and of negative tests | ||
# ind_pos = which(data_cur$DENSITY > pos_thresh_dens) | ||
# ind_neg = which(data_cur$DENSITY <= pos_thresh_dens) | ||
# | ||
# # denominators for each (number of each type that had an observation after then (i.e., they could have been observed to change)) | ||
# last_obs_pos = (data_cur$DENSITY[nrow(data_cur)] > pos_thresh_dens) | ||
# sum_denom_pos = sum_denom_pos + ifelse(last_obs_pos, length(ind_pos)-1, length(ind_pos)) | ||
# sum_denom_neg = sum_denom_neg + ifelse(last_obs_pos, length(ind_neg), length(ind_neg)-1) | ||
# | ||
# # find how many tests change from neg to pos or pos to neg across timesteps | ||
# sum_turn_neg = sum_turn_neg + sum((ind_pos + 1) % in % ind_neg) | ||
# sum_turn_pos = sum_turn_pos + sum((ind_neg + 1) % in % ind_pos) | ||
# | ||
# | ||
# frac_pos_turn_neg_next_time = sum_turn_neg / sum_denom_pos | ||
# frac_neg_turn_pos_next_time = sum_turn_pos / sum_denom_neg | ||
# | ||
# return (c(frac_pos_turn_neg_next_time, frac_neg_turn_pos_next_time)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.