In [4]:
import numpy as np
import pandas as pd
import plotly.express as px


In [5]:
df= pd.read_csv(r"D:\vs\hospital_patient_data_100_rows.csv")
print(df)

     ID       Name  Age  Gender    Illness  Treatment_Cost Admission_Date
0     1      Kelly   71  Female  Infection         7756.69     2023-11-16
1     2    Natalie   46    Male   Diabetes        18582.09     2024-09-20
2     3      Jason   64   Other   Covid-19        16461.73     2023-07-29
3     4      Julia   37   Other   Fracture        32657.47     2023-08-22
4     5  Christine   81    Male   Diabetes        11300.63     2024-04-21
..  ...        ...  ...     ...        ...             ...            ...
95   96      Nancy   73   Other   Covid-19        28134.16     2024-11-19
96   97       Levi   61    Male   Covid-19        37116.26     2025-06-18
97   98       Carl   28    Male     Asthma        27584.39     2024-02-16
98   99    Timothy   22   Other      Fever        21782.96     2023-12-18
99  100   Samantha   84   Other   Covid-19        10133.00     2024-04-16

[100 rows x 7 columns]


In [6]:
print(df.isnull().sum())

ID                0
Name              0
Age               0
Gender            0
Illness           0
Treatment_Cost    0
Admission_Date    0
dtype: int64


In [7]:
print(df.dtypes)

ID                  int64
Name               object
Age                 int64
Gender             object
Illness            object
Treatment_Cost    float64
Admission_Date     object
dtype: object


In [8]:
df["Admission_Date"]=pd.to_datetime(df["Admission_Date"])
print(df["Admission_Date"].dtype)

datetime64[ns]


In [14]:
print(df["Gender"].unique())
df.loc[df["Gender"] == "Other", "Gender"] = np.random.choice(["Male", "Female"], size=(df["Gender"] == "Other").sum())
print(df["Gender"].value_counts())

['Female' 'Male']
Gender
Male      59
Female    41
Name: count, dtype: int64


In [15]:
print(df["Illness"].unique())

['Infection' 'Diabetes' 'Covid-19' 'Fracture' 'Hypertension' 'Fever'
 'Asthma']


In [16]:
illness_counts = df["Illness"].value_counts().reset_index()
illness_counts.columns = ["Illness", "Count"]
illness_counts = illness_counts.sort_values("Count", ascending=False)

fig = px.bar(
    illness_counts, x="Illness", y="Count", color="Illness",
    title="Top Illnesses in Patients", text="Count"
)

fig.update_layout(
    title_font_size=22,
    xaxis_title="Illness",
    yaxis_title="Patient Count",
    xaxis_tickangle=-30,
    plot_bgcolor='rgba(0,0,0,0)',
    font=dict(size=14),
    showlegend=False
)

fig.update_traces(
    texttemplate='%{text}', textposition='outside'
)

fig.show()



In [18]:

gender_counts = df["Gender"].value_counts().reset_index()
gender_counts.columns = ["Gender", "Count"]
fig = px.pie(
    gender_counts,
    names="Gender",
    values="Count",
    title="Gender Distribution of Patients",
    color="Gender",  
    color_discrete_map={
        "Male": "#1f77b4",
        "Female": "#ff6347",
        "Other": "#9b59b6"    
    },
    hole=0.3
)
fig.update_traces(
    textinfo='percent+label',
    textfont_size=14,
    pull=[0.05 if g == "Female" else 0 for g in gender_counts["Gender"]]
)

fig.update_layout(
    showlegend=True,
    legend_title_text='Gender',
    title_font_size=20
)

fig.show()


In [20]:
median_order = df.groupby("Illness")["Treatment_Cost"].median().sort_values(ascending=False).index
fig3 = px.box(
    df,
    x="Illness",
    y="Treatment_Cost",
    color="Illness",
    title="Treatment Cost Distribution by Illness",
    category_orders={"Illness": median_order},
    points="all",
    hover_data=["Name", "Age", "Gender", "Admission_Date"]
)
fig3.update_layout(
    xaxis_title="Illness Type",
    yaxis_title="Treatment Cost (₹)",
    title_font_size=20,
    font=dict(size=13),
    showlegend=False,
    plot_bgcolor="rgba(0,0,0,0)"
)
fig3.update_traces(marker=dict(opacity=0.5), jitter=0.3)

fig3.show()



In [21]:
monthly_admissions = df["Admission_Date"].dt.to_period("M").value_counts().sort_index()
monthly_df = monthly_admissions.reset_index()
monthly_df.columns = ["Month", "Admissions"]
monthly_df["Month"] = monthly_df["Month"].astype(str)

fig4 = px.line(
    monthly_df,
    x="Month",
    y="Admissions",
    title="Monthly Patient Admissions Over Time",
    markers=True,
    text="Admissions"
)
fig4.update_traces(textposition="top center", line=dict(color="#0077b6", width=3))

fig4.update_layout(
    xaxis_title="Month",
    yaxis_title="Number of Admissions",
    xaxis_tickangle=-45,
    title_font_size=20,
    plot_bgcolor="rgba(0,0,0,0)",
    font=dict(size=13),
    hovermode="x unified",
    margin=dict(t=60, b=80)
)
fig4.update_xaxes(type="category")

fig4.show()
