In [2]:
	import pandas as pd
	import numpy as np

	# Load the dataset and check available sheet names
	file_path = '/content/Original.xlsx'
	excel_file = pd.ExcelFile(file_path)

	# Assuming we want to use the first sheet if you are not sure about the name
	sheet_name = excel_file.sheet_names[0]  # Replace this if you want a specific sheet name

	# Load the data from the identified sheet
	df = pd.read_excel(file_path, sheet_name=sheet_name)

	# Replace 'Not Specified' and '-' with NaN
	df.replace(['Not Specified', '-'], np.nan, inplace=True)

	# Select only the columns of interest
	columns_to_keep = ['Sex', 'Patient Age', 'Patient Weight', 'Reason for Use', 'Reactions']
	df = df[columns_to_keep]

	# Drop rows that contain NaN values after the replacements
	df_cleaned = df.dropna()

	# Display the cleaned dataset
	print(df_cleaned)

	# Save the cleaned data to a new Excel file
	output_path = '/content/Original.xlsx'
	df_cleaned.to_excel(output_path, index=False)

	# Output the path for easy reference
	print(f"Cleaned data saved to: {output_path}")


  df.replace(['Not Specified', '-'], np.nan, inplace=True)


          Sex Patient Age Patient Weight  \
0        Male       55 YR       88.65 KG   
15     Female       89 YR       78.93 KG   
19       Male       89 YR       77.11 KG   
38     Female       69 YR          58 KG   
40     Female       87 YR      58.957 KG   
...       ...         ...            ...   
48289  Female       64 YR       67.13 KG   
48290    Male       78 YR         104 KG   
48291  Female       41 YR        79.2 KG   
48292  Female       67 YR          81 KG   
48293  Female       23 YR          90 KG   

                                          Reason for Use  \
0                                               Covid-19   
15                                    Covid-19 Treatment   
19                                    Covid-19 Treatment   
38                 Cognitive Disorder;Covid-19 Treatment   
40     Anxiety;Autoimmune Hepatitis;Blood Pressure Ab...   
...                                                  ...   
48289                                 Multiple Scle

In [3]:
import pandas as pd

# Load the dataset
file_path = '/content/Original.xlsx'  # Update this if necessary
df = pd.read_excel(file_path)

# Function to create age groups based on the 'Patient Age' column
def create_age_group(age):
    if pd.isnull(age):
        return None
    age = float(age.split()[0])  # Extract the numeric value from '39 YR' type values
    if age <= 6:
        return '0-6'
    elif age <= 12:
        return '7-12'
    elif age <= 21:
        return '13-21'
    elif age <= 40:
        return '22-35'
    elif age <= 50:
        return '36-50'
    elif age <= 60:
        return '51-60'
    elif age <= 80:
        return '61-80'
    else:
        return '81+'

# Apply age grouping to the dataset
df['Age Group'] = df['Patient Age'].apply(create_age_group)

# Function to count frequencies of reactions for a given age group and sex
def count_reactions_by_age_sex(data, age_group, sex):
    age_sex_data = data[(data['Age Group'] == age_group) & (data['Sex'] == sex)]
    reactions_freq = {}

    for reactions in age_sex_data['Reactions']:
        if pd.notnull(reactions):
            for reaction in reactions.split(';'):
                reaction = reaction.strip()
                reactions_freq[reaction] = reactions_freq.get(reaction, 0) + 1

    return reactions_freq

# Define age groups and sexes
age_groups = ['0-6', '7-12', '13-21', '22-35', '36-50', '51-60', '61-80', '81+']
sexes = ['Male', 'Female']

# Write frequencies of reactions for each age group and sex to separate sheets in Excel
output_file = '/content/Reactions_Frequencies_By_Age_Sex.xlsx'  # Path for output
with pd.ExcelWriter(output_file) as writer:
    for age_group in age_groups:
        for sex in sexes:
            reactions_freq = count_reactions_by_age_sex(df, age_group, sex)
            reactions_df = pd.DataFrame(list(reactions_freq.items()), columns=['Reaction', 'Frequency'])
            sheet_name = f'{age_group}_{sex}'
            reactions_df.to_excel(writer, sheet_name=sheet_name, index=False)

print(f"Output saved to {output_file}")


Output saved to /content/Reactions_Frequencies_By_Age_Sex.xlsx


In [5]:
import pandas as pd
import re

# Read data from Excel file
input_file = '/content/Original.xlsx'
df = pd.read_excel(input_file)

def age_to_years(age_str):
    try:
        age_str = str(age_str)


        numbers = re.findall(r'\d+', age_str)
        total_years = 0

        for number in numbers:
            if 'YR' in age_str:
                total_years += int(number)
            elif 'DAY' in age_str:
                total_years += int(number) / 365
            elif 'mth' in age_str:
                total_years += int(number) / 12
            else:
                total_years += 0

        return total_years

    except ValueError:
        return None

# Function to group age
def group_age(age):
    if age is None:
        return None

    if age <= 6:
        return '0-6'
    elif age <= 12:
        return '7-12'
    elif age <= 21:
        return '13-21'
    elif age <= 40:
        return '22-35'
    elif age <= 50:
        return '36-50'
    elif age <= 60:
        return '51-60'
    elif age <= 80:
        return '61-80'
    else:
        return '81+'

df['Age (Years)'] = df['Patient Age'].apply(age_to_years)


df['Age Group'] = df['Age (Years)'].apply(group_age)

with pd.ExcelWriter(input_file, engine='openpyxl') as writer:
    df.to_excel(writer, index=False, sheet_name='Sheet1')

print("Data has been successfully grouped and written back to", input_file)


Data has been successfully grouped and written back to /content/Original.xlsx


In [6]:
import pandas as pd

# Read data from Excel file
input_file = '/content/Original.xlsx'
df = pd.read_excel(input_file)

# Function to count frequencies of reactions for a given age group and sex
def count_reactions_by_age_sex(data, age_group, sex):
    age_sex_data = data[(data['Age Group'] == age_group) & (data['Sex'] == sex)]
    reactions_freq = {}
    for reactions in age_sex_data['Reactions']:
        if pd.notnull(reactions):
            for reaction in reactions.split(';'):
                reaction = reaction.strip()
                reactions_freq[reaction] = reactions_freq.get(reaction, 0) + 1
    return reactions_freq

# Define age groups
age_groups = ['0-6', '7-12', '13-21', '22-40', '41-50','51-60', '61-80', '81+']

# Write frequencies of reactions for each age group and sex to separate sheets
output_file = 'Reactions_Frequencies_By_Age_Sex.xlsx'
with pd.ExcelWriter(output_file) as writer:
    for age_group in age_groups:
        # Male data for current age group
        male_reactions_freq = count_reactions_by_age_sex(df, age_group, 'Male')
        male_reactions_df = pd.DataFrame(list(male_reactions_freq.items()), columns=['Reaction', 'Frequency'])
        male_sheet_name = f'{age_group}_Male'
        male_reactions_df.to_excel(writer, index=False, sheet_name=male_sheet_name)

        # Female data for current age group
        female_reactions_freq = count_reactions_by_age_sex(df, age_group, 'Female')
        female_reactions_df = pd.DataFrame(list(female_reactions_freq.items()), columns=['Reaction', 'Frequency'])
        female_sheet_name = f'{age_group}_Female'
        female_reactions_df.to_excel(writer, index=False, sheet_name=female_sheet_name)

print(f"Reactions frequencies by age group and sex have been written to {output_file}.")


Reactions frequencies by age group and sex have been written to Reactions_Frequencies_By_Age_Sex.xlsx.
