# Sorting out BNF

Using knowledge from datalab and the spreadsheet from the NHS on converting the BNF codes to create a simplified dataset that allows for basic analysis of BNF data

In [209]:
import os
os.chdir('../Data') # Set the working directory to the data folder

In [210]:
import pandas as pd

In [211]:
# Import the two necessary datasets
toycomp = pd.read_csv('Combined_TOYCOMP_NHS_data.csv')
bnf = pd.read_csv('20200201_1580570906919_BNF_Code_Information.csv')
bnf = pd.DataFrame(bnf) # Convert bnf to dataframe
def rename_unname(df):
    for col in df:
        if col.startswith('Unnamed'):
            df.drop(col,axis=1, inplace=True)
rename_unname(toycomp) # Remove the unnamed column that appears in the dataset

Aim: Want to separate out into having a dictionary one, for each of the two columns.
Need to check the spacing works, as in this one, the first column only has one reference number. Thinking only want to go up to the 6th column as at this point going too specific on the disease. there is 

In [212]:
# Sorting this out on a toy practive dataset - then apply to the full one in a minute
toycomp['bnf.chapter'] = toycomp['bnf.code'].str[:2] # Select the first two rows of the dataset = chapter
toycomp['bnf.section'] = toycomp['bnf.code'].str[2:4]# Select the 3 + 4 rows of the dataset = BNF section
toycomp['bnf.paragraph'] = toycomp['bnf.code'].str[4:6] # Select the 5 + 6 rows of the dataset = BNF paragraph
toycomp['bnf.chemical'] = toycomp['bnf.code'].str[7:9]
toycomp['bnf.letters'] = toycomp['bnf.code'].str[9:15]

In [213]:
print(list(toycomp.columns))

['timepoint', 'e8...', 'name', 'address_1', 'address_2', 'address_3', 'area', 'postcode', 'national_grouping', 'high_level_health_geography', 'date_open', 'date_close', 'status_code', 'subtype', 'commissioner', 'setting_all_gp_reference', 'type', 'ccg_code', 'ons_ccg_code', 'sex', 'age', 'number_of_patients', 'organisation_code', 'ccg/pct', 'primary_care_organisation_type', 'join_parent_date', 'left_parent_date', 'amended_record_indicator', 'sha', 'practice', 'bnf.code', 'bnf.name', 'items', 'nic', 'act.cost', 'quantity', 'period', 'bnf.chapter', 'bnf.section', 'bnf.paragraph', 'bnf.chemical', 'bnf.letters']


# BNF
### Formatting dataframe to compare bnf code to final dataset

For strings in bnf where chapter code length = 1, add a 0 on to the beginning.
Add it into a new column to try it out, then apply it to the original column in the data set.   
Reference on this function: https://docs.python.org/3/library/stdtypes.html#str.ljust

In [214]:
bnf.rename(columns={'BNF Chapter Code':'bnf-chaptercode', 'BNF Section Code':'bnf-sectioncode', 
                    'BNF Paragraph Code':'bnf-paragraph'}, inplace=True)
to_drop = ['BNF Subparagraph Code']
bnf.drop(to_drop, axis=1, inplace=True)

In [215]:
bnf['bnf-chaptercode'] = bnf['bnf-chaptercode'].astype(str).str.zfill(2) # Make chapter code two long to compare
bnf['bnf-sectioncode'] = bnf['bnf-sectioncode'].astype(str).str[1:3] 
bnf['bnf-paragraph'] = bnf['bnf-paragraph'].astype(str).str[3:5] 

# Export final bnf dataframe

In [216]:
import csv
bnf_chapter_dictionary = bnf[['BNF Chapter', 'bnf-chaptercode', 'BNF Product Code']]
bnf_chapter_dictionary.to_csv("bnf_chapter_dictionary")
bnf_section_dictionary = bnf[['BNF Section', 'bnf-sectioncode', 'BNF Product Code']]
bnf_section_dictionary.to_csv("bnf_bnf_section_dictionary")
bnf_paragraph_dictionary = bnf[['BNF Paragraph', 'bnf-paragraph', 'BNF Product Code']]
bnf_paragraph_dictionary.to_csv("bnf_paragraph_dictionary")

In [218]:
toycomp.to_csv("Combined_TOYCOMP_BNF_NHS_data.csv")