In [19]:
import pandas as pd
import random

def select_random_item_number():
    # Load the data from the CSV file
    data = pd.read_csv('Daily Snapshot.csv')

    # Check if the 'Item Number – 8 digit' column exists
    if 'Item Number – 8 digit' in data.columns:
        # Select a random 'Item Number – 8 digit'
        random_item_number = random.choice(data['Item Number – 8 digit'].tolist())
        return random_item_number

def ExactDrugAlgoFunction(drug_code):
    # Item number of the drug to run the similarity test on
    reference_item_number = drug_code

    # Let's open and read the content of the uploaded file to understand its structure and data.
    file_path = 'Daily Snapshot.csv'

    # Reading the file

    # Load the data from the CSV file
    data = pd.read_csv(file_path)

    # Removing the specified columns and reordering the 'Item Number – 8 digit' column
    columns_to_remove = ['Item Number – 6 digit', 'UPC Number', 'Constant', 
                        'Customer-Specific Item Number', 'Pack Size Divisor', 
                        'RX/OTC Indicator']

    # Removing the columns
    data_cleaned = data.drop(columns=columns_to_remove)

    # Reordering 'Item Number – 8 digit' to the left
    column_to_move = data_cleaned.pop('Item Number – 8 digit')
    data_cleaned.insert(0, 'Item Number – 8 digit', column_to_move)

    # Moving all price columns and the contract flag to the right
    columns_to_move = ['AWP Price', 'Acquisition Price', 'Retail Price', 'WAC Price', 'Contract Flag']
    for col in columns_to_move:
        data_cleaned[col] = data_cleaned.pop(col)

    import re

    # Function to split the generic description into generic name and form
    def split_description(desc):
        match = re.search(r'[A-Z]', desc)
        if match:
            index = match.start()
            return desc[:index].strip(), desc[index:].strip()
        else:
            return desc, ''

    # Applying the function to split 'Generic Description'
    data_cleaned['Generic Name'], data_cleaned['Form'] = zip(*data_cleaned['Generic Description'].apply(split_description))
    data_cleaned.drop(columns=['Generic Description'], inplace=True)

    # Removing rows where 'Generic Name' is empty or whitespace
    data_cleaned = data_cleaned[data_cleaned['Generic Name'].str.strip() != '']

    # Function to split the description into name and size
    def split_description_on_number(desc):
        match = re.search(r'\d', desc)
        if match:
            index = match.start()
            return desc[:index].strip(), desc[index:].strip()
        else:
            return desc, ''

    # Applying the function to split 'Description'
    data_cleaned['Name'], data_cleaned['Size'] = zip(*data_cleaned['Description'].apply(split_description_on_number))
    data_cleaned.drop(columns=['Description'], inplace=True)

    # Find and print the row for the given reference_item_number
    matching_row = data_cleaned[data_cleaned['Item Number – 8 digit'] == reference_item_number]
    

    # Find the Generic Name for the given reference_item_number
    reference_generic_name = data_cleaned.loc[data_cleaned['Item Number – 8 digit'] == reference_item_number, 'Generic Name'].iloc[0]


    # Create a copy of the dataframe filtered by Generic Name
    data_lidocaine = data_cleaned[data_cleaned['Generic Name'] == reference_generic_name].copy()

    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.metrics.pairwise import cosine_similarity

    # Selecting the reference item
    reference_item = data_lidocaine[data_lidocaine['Item Number – 8 digit'] == reference_item_number]
    if reference_item.empty:
        return "Reference item not found in the dataset."

    # Extracting the form of the reference item
    reference_form = reference_item.iloc[0]['Form']
    forms = data_lidocaine['Form'].tolist()
    forms.insert(0, reference_form)

    # Vectorizing the forms using TF-IDF
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(forms)

    # Calculating cosine similarity
    cosine_similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()
    data_lidocaine['Similarity'] = cosine_similarities

    # Filtering the dataset to show only items with a similarity score above 0.9
    similarity_items = data_lidocaine[data_lidocaine['Similarity'] > 0].copy()

    # Extracting the size of the reference item
    reference_size = reference_item.iloc[0]['Size']
    sizes = similarity_items['Size'].tolist()
    sizes.insert(0, reference_size)

    # Vectorizing the sizes using TF-IDF
    tfidf_matrix_sizes = vectorizer.fit_transform(sizes)

    # Calculating cosine similarity for sizes
    cosine_similarities_sizes = cosine_similarity(tfidf_matrix_sizes[0:1], tfidf_matrix_sizes[1:]).flatten()
    similarity_items['Size Similarity'] = cosine_similarities_sizes

    # Filtering by size similarity
    high_similarity_items_filtered_by_size = similarity_items[similarity_items['Size Similarity'] > 0]

    # Remove the first row
    high_similarity_items_filtered_by_size = high_similarity_items_filtered_by_size.iloc[1:]
    
    high_similarity_items_filtered_by_size = high_similarity_items_filtered_by_size.sort_values(by=['Similarity'], ascending=False)
    high_similarity_items_filtered_by_size_2 = high_similarity_items_filtered_by_size.sort_values(by=['Size Similarity'], ascending=False)
    return high_similarity_items_filtered_by_size.head(10), high_similarity_items_filtered_by_size_2.head(10), matching_row

# Example usage
# results = ExactDrugAlgoFunction(your_drug_code)
# print(results.head())


In [27]:
random_item_number = select_random_item_number()
similarity, size_similarity, matching_row = ExactDrugAlgoFunction(random_item_number)
matching_row

Unnamed: 0,Item Number – 8 digit,NDC Number,Size Qty,Retail Pack Quantity,AWP Price,Acquisition Price,Retail Price,WAC Price,Contract Flag,Generic Name,Form,Name,Size
14109,10167172,57237005160,60,1,19094,3228,0,3925,C,galantamine,HBr ORAL TABLET 12,GALANTAMINE,12 MG TAB 60


In [28]:
similarity

Unnamed: 0,Item Number – 8 digit,NDC Number,Size Qty,Retail Pack Quantity,AWP Price,Acquisition Price,Retail Price,WAC Price,Contract Flag,Generic Name,Form,Name,Size,Similarity,Size Similarity
14109,10167172,57237005160,60,1,19094,3228,0,3925,C,galantamine,HBr ORAL TABLET 12,GALANTAMINE,12 MG TAB 60,1.0,1.0
34794,10211342,65862046060,60,1,19094,2131,0,9950,C,galantamine,HBr ORAL TABLET 12,GALANTAMINE,12 MG TAB 60,1.0,1.0
7818,10140985,68084072921,30,1,12600,7238,0,10500,C,galantamine,HBr ORAL TABLET 4,GALANTAMINE,4 MG TAB 30 UD AHP,0.648382,0.194273
14141,10167254,57237004960,60,1,19094,3624,0,3925,N,galantamine,HBr ORAL TABLET 4,GALANTAMINE,4 MG TAB 60,0.648382,0.711662
14165,10167315,57237005060,60,1,19094,3624,0,3925,N,galantamine,HBr ORAL TABLET 8,GALANTAMINE,8MG TAB 60,0.648382,0.373586
34795,10211360,65862045960,60,1,19094,3087,0,6950,C,galantamine,HBr ORAL TABLET 8,GALANTAMINE,8 MG TAB 60,0.648382,0.711662
34799,10211381,65862045860,60,1,19094,1804,0,3950,C,galantamine,HBr ORAL TABLET 4,GALANTAMINE,4 MG TAB 60,0.648382,0.711662
35663,10223968,70436000406,60,1,19100,3624,0,3925,N,galantamine,HBr ORAL TABLET 4,GALANTAMINE,4 MG TAB 60,0.648382,0.711662
2548,10098259,47335083583,30,1,19094,2519,0,13748,C,galantamine,HBr ORAL CAP24H PE,GALANTAMINE,8 MG ER CAP 30,0.206009,0.095974
2549,10098300,47335083683,30,1,19094,2519,0,13748,C,galantamine,HBr ORAL CAP24H PE,GALANTAMINE,16 MG ER CAP 30,0.206009,0.074483


In [29]:
size_similarity

Unnamed: 0,Item Number – 8 digit,NDC Number,Size Qty,Retail Pack Quantity,AWP Price,Acquisition Price,Retail Price,WAC Price,Contract Flag,Generic Name,Form,Name,Size,Similarity,Size Similarity
14109,10167172,57237005160,60,1,19094,3228,0,3925,C,galantamine,HBr ORAL TABLET 12,GALANTAMINE,12 MG TAB 60,1.0,1.0
34794,10211342,65862046060,60,1,19094,2131,0,9950,C,galantamine,HBr ORAL TABLET 12,GALANTAMINE,12 MG TAB 60,1.0,1.0
14141,10167254,57237004960,60,1,19094,3624,0,3925,N,galantamine,HBr ORAL TABLET 4,GALANTAMINE,4 MG TAB 60,0.648382,0.711662
34795,10211360,65862045960,60,1,19094,3087,0,6950,C,galantamine,HBr ORAL TABLET 8,GALANTAMINE,8 MG TAB 60,0.648382,0.711662
34799,10211381,65862045860,60,1,19094,1804,0,3950,C,galantamine,HBr ORAL TABLET 4,GALANTAMINE,4 MG TAB 60,0.648382,0.711662
35663,10223968,70436000406,60,1,19100,3624,0,3925,N,galantamine,HBr ORAL TABLET 4,GALANTAMINE,4 MG TAB 60,0.648382,0.711662
14165,10167315,57237005060,60,1,19094,3624,0,3925,N,galantamine,HBr ORAL TABLET 8,GALANTAMINE,8MG TAB 60,0.648382,0.373586
7818,10140985,68084072921,30,1,12600,7238,0,10500,C,galantamine,HBr ORAL TABLET 4,GALANTAMINE,4 MG TAB 30 UD AHP,0.648382,0.194273
2548,10098259,47335083583,30,1,19094,2519,0,13748,C,galantamine,HBr ORAL CAP24H PE,GALANTAMINE,8 MG ER CAP 30,0.206009,0.095974
18601,10179391,65862074430,30,1,19094,2092,0,4500,C,galantamine,HBr ORAL CAP24H PE,GALANTAMINE,8 MG ER CAP 30,0.206009,0.095974
