The code below is to be run SECOND.

Using the filtered list of payments (from all sources) made to the US-registered physician authors of our included trials, the below code takes this filtered payment list and further filters the payment data to include only payments made in the desired years (e.g., +/- 3 years from trial results publication date) and that were from an SCS-involved company. It will then sum the yearly SCS-related payments made to all US-registered physician authors for each trial.

In [None]:
from cs103 import *
import csv
from typing import List, NamedTuple
import time
import pandas as pd
from pandas import DataFrame


#Data Definitions

PaymentData = NamedTuple('PaymentData', [('physician_id', str),
                                         ('physician_first_name', str),
                                         ('physician_middle_name', str),
                                         ('physician_last_name', str),
                                         ('company_name', str),
                                         ('company_code', str),
                                         ('payment_amount', float),
                                         ('payment_year', int)]) # in range 2013-2019

# interp. the data surrounding one payment from OpenPayments, including: the OpenPayments physician id
# of the physician who received the payment, the physician's first name, middle initial, last name,
# the company who made the payment, the amount of the payment, and the year of the payment.


PD01 = PaymentData('99999', 'Greg', 'G', 'Gregory', 'Medical Company', '853625', 11.25, 2012)
PD02 = PaymentData('888', 'Sally', 'S', 'Sampson', 'Health Company', '183671', 101921.25, 2017)
PD03 = PaymentData('777777', 'Hank', 'H', 'Hubert', 'Medco Sales Company', '18236491', 400, 2009)
PD04 = PaymentData('91873', 'Jenny', 'J', 'June', 'Implant Company',  '236193', 900.99, 2007)
PD05 = PaymentData('91873', 'Jenny', 'J', 'June', 'Nevro Company', '592548', 900.00, 2007)
PD06 = PaymentData('91873', 'Jenny', 'J', 'June', 'Saluda Medical', '592548', 900.00, 2007)

@typecheck
def fn_for_payment_data(pd: PaymentData)-> ...:
    return ...(pd.physician_id,
               pd.physician_first_name,
               pd.physician_middle_name,
               pd.physician_last_name,
               pd.company_code,
               pd.company_name,
               pd.payment_amount,
               pd.payment_year)
    



########################################################################


# List[PaymentData]
# interp. a list of PaymentData

LOPD01 = [PD01, PD02, PD03, PD04, PD05, PD06]
LOPD02 = [PD01]
LOPD03 = [PD04]
LOPD04 = [PD03, PD04]

@typecheck
def fn_for_lopd(lopd: List[PaymentData])-> ...:
    
    #description of the accumulator
    acc = ... #type: ...
    
    for pd in lopd:
        acc = ...(acc, fn_for_payment_data)
        
          
 
    
######################################################################


StudyData = NamedTuple('StudyData', [('PMID', str),
                                      ('year', int),
                                     ('sponsor', str),
                                      ('author_list', List[str])])
                                    #('yearly_payments', List[YearlyTotal])])
                                     #('payment_data', List[PaymentData])])

SD01 = StudyData('816713', 2008, 'health co', ['99999', '888'])
SD02 = StudyData('918201', 2018, 'Medical Stim', ['99999', '888', '777777'])
SD03 = StudyData('816713', 2016, 'Nutrition Co.', ['91873', '888', '777777'])
SD04 = StudyData('816713', 2008, 'Med-Plus', ['91873', '777777', '99999', '888'])

# interp. the information for a single study, including: The PubMed ID of the study, the year the 
# study was published, and a list of the OpenPayments physician ID's for authors of the paper who are 
# a US-based physician and have data on OpenPayments.
                                     
@typecheck
def fn_for_study_data(sd: StudyData)-> ...:
    return ...(sd.PMID,
               sd.year,
               sd.sponsor,
               sd.author_list)



########################################################################



StudyCost = NamedTuple('StudyCost', [('PMID', str),
                                     ('Year', int),
                                     ('All_2013', float),
                                     ('All_2014', float),
                                     ('All_2015', float),
                                     ('All_2016', float),
                                     ('All_2017', float),
                                     ('All_2018', float),
                                     ('All_2019', float),
                                     ('All_2020', float),
                                     ('Sponsor_2013', float),
                                     ('Sponsor_2014', float),
                                     ('Sponsor_2015', float),
                                     ('Sponsor_2016', float),
                                     ('Sponsor_2017', float),
                                     ('Sponsor_2018', float),
                                     ('Sponsor_2019', float),
                                     ('Sponsor_2020', float),
                                     ('Stim_Cost', float),
                                     ('Sponsor_Cost', float)])
                                    

SC01 = StudyCost('5621846410', 2015, 15.0, 52.6, 26.5, 535.35, 356.64,15.0, 52.6, 26.5, 356.64,15.0, 52.6,
                 545.05, 35.25, 26.5, 45.0, 5264823.25, 32651084.25, 4184736.25)


# interp. the costs for a single study, including: The yearly cost of all stim companies, and the
# yearly cost for the trial sponsor, and the summed "trial cost" for all stim companies and the trial sponsor
                                     
@typecheck
def fn_for_study_cost(sc: StudyCost)-> ...:
    return ...(sc.PMID,
               sc.Year,
               sc.All_2013,
               sc.All_2014,
               sc.All_2015,
               sc.All_2016,
               sc.All_2017,
               sc.All_2018,
               sc.All_2019,
               sc.All_2020,
               sc.Sponsor_2013,
               sc.Sponsor_2014,
               sc.Sponsor_2015,
               sc.Sponsor_2016,
               sc.Sponsor_2017,
               sc.Sponsor_2018,
               sc.Sponsor_2019,
               sc.Sponsor_2020,              
               sc.Stim_Cost,
               sc.Sponsor_Cost)
               



In [None]:
@typecheck
def main(data_filename: str, author_data_filename: str, new_filename: str) -> None:
    """
    Takes in a payment data file, and a file of author data, and returns a CSV for the trial costs for
    all trials in the author data file.
    """
    
    all_payment_data = read_payment_data(data_filename)
    all_study_data_list = pull_our_study_data(author_data_filename)
    
    list_of_study_costs = []


    
    for study in all_study_data_list:
        study_payments = pull_payment_data(data_filename, study)
        all_2013 = sum_year_all_stim(study_payments, 2013)
        all_2014 = sum_year_all_stim(study_payments, 2014)
        all_2015 = sum_year_all_stim(study_payments, 2015)
        all_2016 = sum_year_all_stim(study_payments, 2016)
        all_2017 = sum_year_all_stim(study_payments, 2017)
        all_2018 = sum_year_all_stim(study_payments, 2018)
        all_2019 = sum_year_all_stim(study_payments, 2019)
        all_2020 = sum_year_all_stim(study_payments, 2020)

        sponsor_2013 = sum_one_company_one_year(study_payments, parse_sponsor(study.sponsor), 2013)
        sponsor_2014 = sum_one_company_one_year(study_payments, parse_sponsor(study.sponsor), 2014)
        sponsor_2015 = sum_one_company_one_year(study_payments, parse_sponsor(study.sponsor), 2015)
        sponsor_2016 = sum_one_company_one_year(study_payments, parse_sponsor(study.sponsor), 2016)
        sponsor_2017 = sum_one_company_one_year(study_payments, parse_sponsor(study.sponsor), 2017)
        sponsor_2018 = sum_one_company_one_year(study_payments, parse_sponsor(study.sponsor), 2018)
        sponsor_2019 = sum_one_company_one_year(study_payments, parse_sponsor(study.sponsor), 2019)
        sponsor_2020 = sum_one_company_one_year(study_payments, parse_sponsor(study.sponsor), 2020)
        
        stim_cost_yearly_totals = [all_2013, all_2014, all_2015, all_2016, all_2017, all_2018, all_2019,
                                   all_2020]
        sponsor_cost_yearly_totals = [sponsor_2013, sponsor_2014, sponsor_2015, sponsor_2016, sponsor_2017,
                                      sponsor_2018, sponsor_2019, sponsor_2020]
        
        stim_cost = sum_stim_cost(study.year, stim_cost_yearly_totals)
        sponsor_cost = sum_sponsor_cost(study.year, sponsor_cost_yearly_totals)
        
        
        
        
        sc = StudyCost(study.PMID, study.year, all_2013, all_2014, all_2015, all_2016, all_2017, all_2018, 
                       all_2019, all_2020, sponsor_2013, sponsor_2014, sponsor_2015, sponsor_2016, 
                       sponsor_2017, sponsor_2018, sponsor_2019, sponsor_2020, stim_cost, sponsor_cost)
                       

        
    
        list_of_study_costs.append(sc)
        
    
    
    write_to_csv(list_of_study_costs, new_filename)
    

    
     
        
@typecheck
def pull_payment_data(data_filename: str, study: StudyData)-> List[PaymentData]:
    """
    
    """
    
    payment_list = pull_relevant_payments(read_payment_data(data_filename), study)
    
    return payment_list
    

    
    ## Uncomment to check for list length and content
    #return len(payment_list)
    #return payment_list

    


    
@typecheck
def pull_relevant_payments(lopd: List[PaymentData], sd: StudyData)-> List[PaymentData]:
    """
    Takes in a study and a list of payment data, and returns the relevant payment data 
    for that study
    
    Relevant is defined below.
    """
    
    
    #lorpd contains the list of relevant payment data seen so far
    lorpd = [] #type List[PaymentData]
    
    
    for pd in lopd:
        if is_relevant_payment(pd, sd):
            lorpd.append(pd)
   

    return lorpd
            
               
            
@typecheck
def is_relevant_payment(pd: PaymentData, sd: StudyData)-> bool:
    """
    Takes in a study, and a single payment data, and returns True if the payment data 
    is relevant.
    
    Relevant is defined as being a payment that was: 1) issued to one of the US-based physicians who
    was an author of the study, 2) from a company involved in spinal cord stimulation.
    """
    
    
    return is_paper_author(sd.author_list, pd.physician_id) and is_stim_company(pd.company_name)




@typecheck
def is_stim_company(s: str)-> bool:
    """
    Takes in a string representing a company, and returns True if it is a stimulator company
    """
    
    company_roots = ['nevro', 'jude', 'saluda', 'stimwave', 'medtronic', 'abbott', 'spinal mod', 'nuvectra',
                    'abbott', 'boston'] 
    
    for company in company_roots:
        if company in s.casefold():
            return True
    
    return False
           



@typecheck
def is_paper_author(author_list: List[str], author: str) -> bool:
    """
    Takes in a list of strings representing authors for a paper, and a string representing a
    author, and returns True if that author is an author of that paper.
    """
    
    
    return author in author_list



    
@typecheck
def read_payment_data(filename: str)-> List[PaymentData]:
    """
    Reads in all of the payment data from the specified file, and returns the list of PaymentData
    """
    
    #lopd contains the list of payment data seen so far
    lopd = [] #type List[PaymentData]
    
    with open(filename, newline='') as csvfile:

        reader = csv.reader(csvfile)
        next(reader) # skip header line

        for row in reader:
            pd = PaymentData(row[0], row[1], row[2], row[3], row[4], row[5], parse_float(row[6]), 
                             parse_int(row[9]))
            lopd.append(pd)
            
    return lopd 

   
    
    
    
@typecheck
def pull_our_study_data(filename: str)-> List[StudyData]:
    """
    Pulls the list of studies in the file by their PMIDS.
    
    Then, reads the information from the specified file of studies, and returns a list of the study data
    for all of the studies in the file.
    """
    
    lopmids = pull_study_PMIDS(filename)
    losd = []

    for s in lopmids:
        losd.append(pull_study_data(filename, s))
        
    return losd



@typecheck
def pull_study_data(filename: str, study_PMID: str)-> StudyData:
    """
    Reads the information from the specified filename, and returns the study data for the specificed study
    """
    

    #loa contains the list of study authors seen so far
    loa = [] #type List[str]
    PMID = study_PMID

    with open(filename, newline='') as csvfile:
        
        reader = csv.reader(csvfile)
        next(reader) # skip header line

        for row in reader:
            if row[4] == study_PMID:
                loa.append(row[1])
                year = parse_int(row[5])
                sponsor = row[6]
                
        sd = StudyData(PMID, year, sponsor, loa)

    
    return sd


@typecheck
def pull_study_PMIDS(filename: str)-> List[str]:
    """
    """
    
    #los contains the list of study PMID's seen so far
    los = [] #type List[str]

    with open(filename, newline='') as csvfile:
        
        reader = csv.reader(csvfile)
        next(reader) # skip header line

        for row in reader:
            if row[4] not in los:
                los.append(row[4])
         
                
    return los
    
@typecheck
def parse_sponsor(sponsor: str) -> str:
    """
    Takes in a study sponsor as a string, and returns a shortened string representing that company
    """
    
    stim_company_roots = ['nevro', 'jude', 'saluda', 'stimwave', 'medtronic', 'abbott', 'spinal mod', 'nuvectra',
                    'abbott', 'boston']
    
    sponsor_co = 'Not Applicable'

    for company in stim_company_roots:
        if company in sponsor.casefold():
            sponsor_co = company
            
    return sponsor_co

        
    
@typecheck
def sum_one_company_one_year(lopd: List[PaymentData], c: str, yr: int)-> float:
    
    
    sum = 0.0
    
    for pd in lopd:
        if c in pd.company_name.casefold() and pd.payment_year == yr:
            sum = sum + pd.payment_amount
            
    return sum

        
    
@typecheck
def sum_year_all_stim(lopd: List[PaymentData], yr: int)-> float:
    
    sum = 0.0
    
    for pd in lopd:
        if pd.payment_year == yr:
            sum = sum + pd.payment_amount
            
    return sum


@typecheck
def sum_stim_cost(year: int, yearly_stim_totals: List[float]) -> float:
    """
    Takes in the year of one study, and calculates the 'cost' of that study. The cost of the study is
    defined as all payments from stim-related companies in the 2 years prior to and the year of publication.
    """
    
    
    if year == 2013:
        return (yearly_stim_totals[0])
    elif year == 2014:
        return (yearly_stim_totals[0] + yearly_stim_totals[1])
    elif year == 2015:
        return (yearly_stim_totals[0] + yearly_stim_totals[1] + yearly_stim_totals[2])
    elif year == 2016:
        return (yearly_stim_totals[1] + yearly_stim_totals[2] + yearly_stim_totals[3])
    elif year == 2017:
        return (yearly_stim_totals[2] + yearly_stim_totals[3] + yearly_stim_totals[4])
    elif year == 2018:
        return (yearly_stim_totals[3] + yearly_stim_totals[4] + yearly_stim_totals[5])
    elif year == 2019:
        return (yearly_stim_totals[4] + yearly_stim_totals[5] + yearly_stim_totals[6])
    elif year == 2020:
        return (yearly_stim_totals[5] + yearly_stim_totals[6] + yearly_stim_totals[7])
    
    
    
@typecheck
def sum_sponsor_cost(year: int, yearly_sponsor_totals: List[float]) -> float:
    """
    Takes in the year of one study, and calculates the 'cost' of that study. The cost of the study is
    defined as all payments from the sponsor company in the 2 years prior to and the year of publication.
    """
    
    
    if year == 2013:
        return (yearly_sponsor_totals[0])
    elif year == 2014:
        return (yearly_sponsor_totals[0] + yearly_sponsor_totals[1])
    elif year == 2015:
        return (yearly_sponsor_totals[0] + yearly_sponsor_totals[1] + yearly_sponsor_totals[2])
    elif year == 2016:
        return (yearly_sponsor_totals[1] + yearly_sponsor_totals[2] + yearly_sponsor_totals[3])
    elif year == 2017:
        return (yearly_sponsor_totals[2] + yearly_sponsor_totals[3] + yearly_sponsor_totals[4])
    elif year == 2018:
        return (yearly_sponsor_totals[3] + yearly_sponsor_totals[4] + yearly_sponsor_totals[5])
    elif year == 2019:
        return (yearly_sponsor_totals[4] + yearly_sponsor_totals[5] + yearly_sponsor_totals[6])
    elif year == 2020:
        return (yearly_sponsor_totals[5] + yearly_sponsor_totals[6] + yearly_sponsor_totals[7])
    


@typecheck    
def write_to_csv(study_cost_list: List[StudyCost], new_filename: str) -> None:
    """
    Takes in a list of strings, and outputs a CSV file with the given name for the output file,
    consisting of the data of the list of strings and the header of the data file.
    
    """
    
    fields = ['PMID', 'Year', '2013 All', '2014 All', '2015 All', '2016 All', '2017 All', '2018 All',
              '2019 All', '2020 All', 
              '2013 Sponsor', '2014 Sponsor', '2015 Sponsor', '2016 Sponsor', '2017 Sponsor', '2018 Sponsor',
              '2019 Sponsor', '2020 Sponsor', 'Stim Related Cost', 'Sponsor Cost']
    
    rows = study_cost_list
    with open(new_filename, 'w', newline='') as f:
    
        writer = csv.writer(f)
      
        writer.writerow(fields)
        writer.writerows(rows)
    



start_testing()


# expect(is_stim_company(['Company 1'], 'Company 1'), True)
# expect(is_stim_company(['Company 1'], 'Company 2'), False)
# expect(is_stim_company(['Company 1', 'Company 2'], 'Company 1'), True)
# expect(is_stim_company(['Company 1', 'Company 2'], 'Company 3'), False)
# expect(is_stim_company([], 'Company 1'), False)


#expect(read('Tests/All_years_filtered_GP_data_April_19_Data_Pull-2_Lines.csv'), [PaymentData('184740', 
#                                                                                        'MICHAEL', 
#                                                                                        'I', 
#                                                                                        'YANG',                                                                                       
#                                                                                        'Forest Laboratories, Inc.',
#                                                                                        '100000005529',
#                                                                                        14.57,
#                                                                                        2013),
#                                                                           PaymentData('184740', 
#                                                                                        'MICHAEL', 
#                                                                                        'I', 
#                                                                                        'YANG',                                                                                       
#                                                                                        'Forest Laboratories, Inc.',
#                                                                                     '100000005529',
#                                                                                       17.06,
#                                                                                        2013)])


expect(is_paper_author(['Author 1'], 'Author 1'), True)
expect(is_paper_author(['Author 2'], 'Author 1'), False)
expect(is_paper_author(['Author 1', 'Author 2'], 'Author 1'), True)
expect(is_paper_author(['Author 1', 'Author 2'], 'Author 3'), False)
expect(is_paper_author([], 'Author 1'), False)


expect(is_stim_company('St. Jude Medical'), True)
expect(is_stim_company('Nevro Corp.'), True)
expect(is_stim_company('NEVRO CORP'), True)
expect(is_stim_company('St. Bernard Medical'), False)
expect(is_stim_company('Pfizer'), False)
expect(is_stim_company('Nevrocorp'), True)
expect(is_stim_company('Health Medical'), False)
expect(is_stim_company('Saluda'), True)
expect(is_stim_company('SaLuDA'), True)
expect(is_stim_company('Spinal Modulation'), True)


expect(is_relevant_payment(PD01, SD01), False)
expect(is_relevant_payment(PD04, SD04), False)
expect(is_relevant_payment(PD05, SD04), True)
expect(is_relevant_payment(PD06, SD04), True)


# expect(pull_relevant_payments(LOPD01, SD04), [PD05, PD06])
# expect(pull_relevant_payments(LOPD01, SD03), [])
       
summary()