# Synthetic Healthcare Visit Records

## M.Abdirisak.A 
## 22/02/2025


In [1]:
# import neccessary libraries 
import pandas as pd
import random
from faker import Faker

In [2]:
# Initialize Faker instance
fake = Faker()
# Predefined lists for possible values
diagnoses = ["Diabetes", "Hypertension", "Heart Disease", "Asthma", "Arthritis"]
treatments = ["Insulin Therapy", "Blood Pressure Medication", "Cardiac Surgery", "Inhaler Therapy", "Physical Therapy"]
doctors = ["Dr. Smith", "Dr. Lee", "Dr. Wilson", "Dr. Johnson", "Dr. Martinez"]
payment_methods = ["Insurance", "Credit Card", "Cash", "Debit Card"]
hospital_locations = ["New York", "Los Angeles", "Chicago", "Houston"]

# List to store generated patient records
data = []
for i in range(1, 101):  # Generate 100 samples
    record = {
        "Visit_ID": 2000 + i,
        "Date": fake.date_between(start_date="-1y", end_date="today").strftime("%Y-%m-%d"),
        "Patient_Name": fake.name(),
        "Age": random.randint(18, 80),
        "Gender": random.choice(["Male", "Female"]),
        "Diagnosis": random.choice(diagnoses),
        "Treatment": random.choice(treatments),
        "Doctor": random.choice(doctors),
        "Treatment_Cost": round(random.uniform(100, 15000), 2),
        "Payment_Method": random.choice(payment_methods),
        "Hospital_Location": random.choice(hospital_locations)
    }
    data.append(record) # Append the generated record to the list

# Convert to DataFrame
df = pd.DataFrame(data)



## 1️⃣ Load JSON Data into a Pandas DataFrame


In [3]:
# convert to dataframe
df=pd.DataFrame(data)
# head of the df
df.head()

Unnamed: 0,Visit_ID,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
0,2001,2024-10-09,Joseph Perry,56,Male,Asthma,Inhaler Therapy,Dr. Johnson,7745.44,Debit Card,Houston
1,2002,2024-07-23,Emily Russo PhD,57,Female,Heart Disease,Insulin Therapy,Dr. Martinez,2927.3,Cash,New York
2,2003,2024-07-06,John Cook,74,Female,Arthritis,Blood Pressure Medication,Dr. Wilson,8413.13,Debit Card,New York
3,2004,2025-01-09,Seth York,32,Male,Hypertension,Insulin Therapy,Dr. Lee,7426.62,Cash,Houston
4,2005,2024-04-22,Lisa Lowery,19,Female,Heart Disease,Cardiac Surgery,Dr. Smith,14435.85,Debit Card,Chicago


In [4]:
df.tail()

Unnamed: 0,Visit_ID,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
95,2096,2025-01-24,Brian Bush,22,Male,Diabetes,Blood Pressure Medication,Dr. Smith,2597.45,Debit Card,New York
96,2097,2024-05-08,Jacob Wiley,54,Male,Hypertension,Inhaler Therapy,Dr. Lee,8616.86,Debit Card,New York
97,2098,2024-06-07,Megan Lee,29,Female,Asthma,Inhaler Therapy,Dr. Martinez,13008.28,Credit Card,Chicago
98,2099,2025-01-17,Morgan Saunders,72,Female,Heart Disease,Insulin Therapy,Dr. Lee,1538.22,Cash,Los Angeles
99,2100,2025-01-23,Christian Cisneros,63,Male,Asthma,Cardiac Surgery,Dr. Martinez,2860.35,Insurance,Chicago


## 2️⃣ Display DataFrame Information

In [5]:
# more info in the dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Visit_ID           100 non-null    int64  
 1   Date               100 non-null    object 
 2   Patient_Name       100 non-null    object 
 3   Age                100 non-null    int64  
 4   Gender             100 non-null    object 
 5   Diagnosis          100 non-null    object 
 6   Treatment          100 non-null    object 
 7   Doctor             100 non-null    object 
 8   Treatment_Cost     100 non-null    float64
 9   Payment_Method     100 non-null    object 
 10  Hospital_Location  100 non-null    object 
dtypes: float64(1), int64(2), object(8)
memory usage: 8.7+ KB


## 3️⃣ Check for Missing Values and Fill Them

In [6]:
# checking NaN values
df.isnull().sum()

Visit_ID             0
Date                 0
Patient_Name         0
Age                  0
Gender               0
Diagnosis            0
Treatment            0
Doctor               0
Treatment_Cost       0
Payment_Method       0
Hospital_Location    0
dtype: int64

There is no missing values

# <span style="color:red;">NB</span>
 <span style="color:darkorange;">I will be using df.head(2) just to have an overview of the dataset instead of scrolling up and down every now and then</span>


## 4️⃣ Convert Date Column to Datetime Format

In [7]:
df.head(2)

Unnamed: 0,Visit_ID,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
0,2001,2024-10-09,Joseph Perry,56,Male,Asthma,Inhaler Therapy,Dr. Johnson,7745.44,Debit Card,Houston
1,2002,2024-07-23,Emily Russo PhD,57,Female,Heart Disease,Insulin Therapy,Dr. Martinez,2927.3,Cash,New York


In [8]:
# convert column 'Date' into datetime data type
df['Date']=pd.to_datetime(df['Date'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Visit_ID           100 non-null    int64         
 1   Date               100 non-null    datetime64[ns]
 2   Patient_Name       100 non-null    object        
 3   Age                100 non-null    int64         
 4   Gender             100 non-null    object        
 5   Diagnosis          100 non-null    object        
 6   Treatment          100 non-null    object        
 7   Doctor             100 non-null    object        
 8   Treatment_Cost     100 non-null    float64       
 9   Payment_Method     100 non-null    object        
 10  Hospital_Location  100 non-null    object        
dtypes: datetime64[ns](1), float64(1), int64(2), object(7)
memory usage: 8.7+ KB


'Date' column converted to datetime dtype

## 5️⃣ Set Visit_ID as Index

In [9]:
df.head(2)

Unnamed: 0,Visit_ID,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
0,2001,2024-10-09,Joseph Perry,56,Male,Asthma,Inhaler Therapy,Dr. Johnson,7745.44,Debit Card,Houston
1,2002,2024-07-23,Emily Russo PhD,57,Female,Heart Disease,Insulin Therapy,Dr. Martinez,2927.3,Cash,New York


Notice we have an index starting from 0 

In [10]:
# setting Visit_ID' as index
df.set_index('Visit_ID', inplace=True)

In [11]:
df.head(2)

Unnamed: 0_level_0,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001,2024-10-09,Joseph Perry,56,Male,Asthma,Inhaler Therapy,Dr. Johnson,7745.44,Debit Card,Houston
2002,2024-07-23,Emily Russo PhD,57,Female,Heart Disease,Insulin Therapy,Dr. Martinez,2927.3,Cash,New York


Now we have and index as 'Visit_ID'

## 6️⃣ Find Unique Diagnoses

In [12]:
# display aal unique diagnosis types in the dataset
df['Diagnosis'].unique()

array(['Asthma', 'Heart Disease', 'Arthritis', 'Hypertension', 'Diabetes'],
      dtype=object)

## 7️⃣ Count the Number of Patients by Gender

In [13]:
df.head(2)

Unnamed: 0_level_0,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001,2024-10-09,Joseph Perry,56,Male,Asthma,Inhaler Therapy,Dr. Johnson,7745.44,Debit Card,Houston
2002,2024-07-23,Emily Russo PhD,57,Female,Heart Disease,Insulin Therapy,Dr. Martinez,2927.3,Cash,New York


In [14]:
# number of patients by gender
df['Gender'].value_counts()

Gender
Female    53
Male      47
Name: count, dtype: int64

## 8️⃣ Calculate the Average Age of Patients

In [15]:
df.head()

Unnamed: 0_level_0,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001,2024-10-09,Joseph Perry,56,Male,Asthma,Inhaler Therapy,Dr. Johnson,7745.44,Debit Card,Houston
2002,2024-07-23,Emily Russo PhD,57,Female,Heart Disease,Insulin Therapy,Dr. Martinez,2927.3,Cash,New York
2003,2024-07-06,John Cook,74,Female,Arthritis,Blood Pressure Medication,Dr. Wilson,8413.13,Debit Card,New York
2004,2025-01-09,Seth York,32,Male,Hypertension,Insulin Therapy,Dr. Lee,7426.62,Cash,Houston
2005,2024-04-22,Lisa Lowery,19,Female,Heart Disease,Cardiac Surgery,Dr. Smith,14435.85,Debit Card,Chicago


In [16]:
# avearge age of patients 
df['Age'].mean()

49.92

## 9️⃣ Find the Most Common Diagnosis

In [17]:
df.head(2)

Unnamed: 0_level_0,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001,2024-10-09,Joseph Perry,56,Male,Asthma,Inhaler Therapy,Dr. Johnson,7745.44,Debit Card,Houston
2002,2024-07-23,Emily Russo PhD,57,Female,Heart Disease,Insulin Therapy,Dr. Martinez,2927.3,Cash,New York


In [18]:
# most common Diagnosis (mode)
df['Diagnosis'].mode()[0]

'Asthma'

In [19]:
# most common diagnosis
df['Diagnosis'].value_counts().idxmax()

'Asthma'

## 🔟 Find Total Treatment Cost by Payment Method

In [20]:
df.head(2)

Unnamed: 0_level_0,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001,2024-10-09,Joseph Perry,56,Male,Asthma,Inhaler Therapy,Dr. Johnson,7745.44,Debit Card,Houston
2002,2024-07-23,Emily Russo PhD,57,Female,Heart Disease,Insulin Therapy,Dr. Martinez,2927.3,Cash,New York


In [21]:
# # Grouping the dataframe by 'Payment_Method' and summing the 'Treatment_Cost' for each payment method
df.groupby('Payment_Method')['Treatment_Cost'].sum()

Payment_Method
Cash           204941.21
Credit Card    193524.41
Debit Card     188001.16
Insurance      162305.33
Name: Treatment_Cost, dtype: float64

## 1️⃣1️⃣ Find the Patient with the Highest Treatment Cost

In [22]:
df.head(2)

Unnamed: 0_level_0,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001,2024-10-09,Joseph Perry,56,Male,Asthma,Inhaler Therapy,Dr. Johnson,7745.44,Debit Card,Houston
2002,2024-07-23,Emily Russo PhD,57,Female,Heart Disease,Insulin Therapy,Dr. Martinez,2927.3,Cash,New York


In [23]:
# Select the row(s) where Treatment_Cost is at its maximum
df[df['Treatment_Cost']==df['Treatment_Cost'].max()]

Unnamed: 0_level_0,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2079,2024-04-19,Jason Fox,62,Male,Heart Disease,Blood Pressure Medication,Dr. Wilson,14921.23,Cash,Houston


## 1️⃣2️⃣ Find All Patients Diagnosed with Diabetes

In [24]:
df.head(2)

Unnamed: 0_level_0,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001,2024-10-09,Joseph Perry,56,Male,Asthma,Inhaler Therapy,Dr. Johnson,7745.44,Debit Card,Houston
2002,2024-07-23,Emily Russo PhD,57,Female,Heart Disease,Insulin Therapy,Dr. Martinez,2927.3,Cash,New York


In [25]:
# filtering all rows where 'Diagnosis' is Diabetes
df[df['Diagnosis']=='Diabetes']

Unnamed: 0_level_0,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2006,2024-06-04,Deborah Stone,74,Female,Diabetes,Physical Therapy,Dr. Lee,9994.55,Credit Card,Los Angeles
2008,2024-04-22,Jonathan Jones,49,Male,Diabetes,Insulin Therapy,Dr. Martinez,8663.73,Debit Card,Los Angeles
2016,2024-07-29,Kyle Blackwell,69,Female,Diabetes,Cardiac Surgery,Dr. Wilson,7658.16,Debit Card,Los Angeles
2018,2024-03-03,Hunter Washington,54,Male,Diabetes,Cardiac Surgery,Dr. Wilson,11915.48,Insurance,Los Angeles
2020,2024-10-14,Audrey Jensen,23,Male,Diabetes,Insulin Therapy,Dr. Smith,14731.98,Credit Card,Los Angeles
2021,2024-12-09,Michele Carpenter,65,Male,Diabetes,Blood Pressure Medication,Dr. Lee,2599.23,Insurance,Chicago
2025,2025-01-17,Andrew Murillo,51,Male,Diabetes,Blood Pressure Medication,Dr. Wilson,4030.85,Cash,Los Angeles
2031,2024-04-19,Brian Williams,69,Female,Diabetes,Inhaler Therapy,Dr. Lee,1320.28,Credit Card,New York
2038,2024-10-07,Michael Campbell,70,Male,Diabetes,Physical Therapy,Dr. Martinez,11305.05,Debit Card,New York
2039,2024-11-20,Melinda Myers,37,Male,Diabetes,Insulin Therapy,Dr. Lee,13676.87,Insurance,New York


## 1️⃣3️⃣ Count Visits by Hospital Location

In [26]:
df.head(2)

Unnamed: 0_level_0,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001,2024-10-09,Joseph Perry,56,Male,Asthma,Inhaler Therapy,Dr. Johnson,7745.44,Debit Card,Houston
2002,2024-07-23,Emily Russo PhD,57,Female,Heart Disease,Insulin Therapy,Dr. Martinez,2927.3,Cash,New York


In [27]:
# vists by hospital location 
df['Hospital_Location'].value_counts()

Hospital_Location
New York       33
Chicago        25
Houston        22
Los Angeles    20
Name: count, dtype: int64

## 1️⃣4️⃣ Find All Patients Who Paid with Cash

In [28]:
df.head(2)

Unnamed: 0_level_0,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001,2024-10-09,Joseph Perry,56,Male,Asthma,Inhaler Therapy,Dr. Johnson,7745.44,Debit Card,Houston
2002,2024-07-23,Emily Russo PhD,57,Female,Heart Disease,Insulin Therapy,Dr. Martinez,2927.3,Cash,New York


In [29]:
# filtering all row where 'Payment_Method' is 'Cash'
df[df['Payment_Method']=='Cash']

Unnamed: 0_level_0,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2002,2024-07-23,Emily Russo PhD,57,Female,Heart Disease,Insulin Therapy,Dr. Martinez,2927.3,Cash,New York
2004,2025-01-09,Seth York,32,Male,Hypertension,Insulin Therapy,Dr. Lee,7426.62,Cash,Houston
2023,2024-10-22,Ms. Jane Gordon,50,Male,Arthritis,Blood Pressure Medication,Dr. Smith,192.86,Cash,Houston
2025,2025-01-17,Andrew Murillo,51,Male,Diabetes,Blood Pressure Medication,Dr. Wilson,4030.85,Cash,Los Angeles
2028,2025-01-02,Veronica Vincent,65,Female,Asthma,Blood Pressure Medication,Dr. Lee,9616.49,Cash,Houston
2029,2024-09-17,Jacob Thompson,42,Male,Heart Disease,Cardiac Surgery,Dr. Johnson,6723.3,Cash,New York
2030,2025-01-09,Zachary Anderson,72,Female,Asthma,Physical Therapy,Dr. Martinez,8948.51,Cash,New York
2035,2024-03-17,Carolyn Contreras,36,Female,Arthritis,Blood Pressure Medication,Dr. Johnson,9694.65,Cash,Los Angeles
2037,2024-03-16,Michael Gregory,46,Male,Asthma,Inhaler Therapy,Dr. Lee,10010.27,Cash,Los Angeles
2041,2024-10-16,Deanna Ross,69,Male,Asthma,Cardiac Surgery,Dr. Wilson,14280.89,Cash,New York


## 1️⃣5️⃣ Get the Top 5 Most Expensive Treatments

In [30]:
df.head(2)

Unnamed: 0_level_0,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001,2024-10-09,Joseph Perry,56,Male,Asthma,Inhaler Therapy,Dr. Johnson,7745.44,Debit Card,Houston
2002,2024-07-23,Emily Russo PhD,57,Female,Heart Disease,Insulin Therapy,Dr. Martinez,2927.3,Cash,New York


In [31]:
#  Select the top 5 rows with the highest treatment costs
df.nlargest(5, "Treatment_Cost")

Unnamed: 0_level_0,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2079,2024-04-19,Jason Fox,62,Male,Heart Disease,Blood Pressure Medication,Dr. Wilson,14921.23,Cash,Houston
2085,2024-02-28,Keith Ortiz,61,Female,Hypertension,Blood Pressure Medication,Dr. Martinez,14901.58,Debit Card,New York
2020,2024-10-14,Audrey Jensen,23,Male,Diabetes,Insulin Therapy,Dr. Smith,14731.98,Credit Card,Los Angeles
2077,2024-05-06,Erin Bailey,78,Female,Heart Disease,Inhaler Therapy,Dr. Martinez,14528.79,Credit Card,New York
2005,2024-04-22,Lisa Lowery,19,Female,Heart Disease,Cardiac Surgery,Dr. Smith,14435.85,Debit Card,Chicago


## 1️⃣6️⃣How many visits occurred each month?

In [32]:
df.head(2)

Unnamed: 0_level_0,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001,2024-10-09,Joseph Perry,56,Male,Asthma,Inhaler Therapy,Dr. Johnson,7745.44,Debit Card,Houston
2002,2024-07-23,Emily Russo PhD,57,Female,Heart Disease,Insulin Therapy,Dr. Martinez,2927.3,Cash,New York


First of all change 'Date' colum into datetime dtype and create a column for mont

In [33]:
# conver 'Date' into datetime dtype
df['Month']=pd.to_datetime(df['Date'])
df['Month']=df['Date'].dt.month

In [34]:
df.head(2)

Unnamed: 0_level_0,Date,Patient_Name,Age,Gender,Diagnosis,Treatment,Doctor,Treatment_Cost,Payment_Method,Hospital_Location,Month
Visit_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2001,2024-10-09,Joseph Perry,56,Male,Asthma,Inhaler Therapy,Dr. Johnson,7745.44,Debit Card,Houston,10
2002,2024-07-23,Emily Russo PhD,57,Female,Heart Disease,Insulin Therapy,Dr. Martinez,2927.3,Cash,New York,7


Notice the new column 'Month' created 

In [35]:
# visits per month
df['Month'].value_counts()

Month
1     15
6     11
9     10
10     9
7      9
11     8
2      8
5      8
8      7
4      6
3      5
12     4
Name: count, dtype: int64