# üè• Healthcare Data Analysis using Python
### Author: [Your Name]
**Objective:** Analyze hospital or patient data to find insights related to age, disease, gender, and hospital charges.

In [None]:
# üß© Step 1: Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [None]:
# üì• Step 2: Load the Dataset
# Replace the file path with your dataset name
df = pd.read_csv("patient_data.csv")

# Display first 5 rows
df.head()

In [None]:
# üîç Step 3: Basic Data Exploration
print("Dataset shape:", df.shape)
print("\nColumn names:\n", df.columns)
print("\nData info:")
print(df.info())

print("\nStatistical Summary:")
df.describe()

In [None]:
# üßπ Step 4: Data Cleaning
print(df.isnull().sum())

df['Age'].fillna(df['Age'].mean(), inplace=True)
df['Gender'].fillna(df['Gender'].mode()[0], inplace=True)
df['Disease'].fillna("Unknown", inplace=True)
df.drop_duplicates(inplace=True)

print("\nAfter cleaning:")
print(df.info())

In [None]:
# üìä Step 5: Exploratory Data Analysis (EDA)
plt.figure(figsize=(6,4))
sns.countplot(x='Gender', data=df, palette='Set2')
plt.title("Count of Patients by Gender")
plt.show()

plt.figure(figsize=(10,5))
df['Disease'].value_counts().head(10).plot(kind='bar', color='skyblue')
plt.title("Top 10 Most Common Diseases")
plt.xlabel("Disease")
plt.ylabel("Number of Patients")
plt.show()

plt.figure(figsize=(6,4))
sns.barplot(x='Gender', y='HospitalCharges', data=df, palette='coolwarm')
plt.title("Average Hospital Charges by Gender")
plt.show()

plt.figure(figsize=(6,4))
sns.scatterplot(x='Age', y='HospitalCharges', data=df, hue='Gender')
plt.title("Age vs Hospital Charges")
plt.show()

plt.figure(figsize=(5,3))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='Blues')
plt.title("Correlation Heatmap")
plt.show()

In [None]:
# üí° Step 6: Key Insights
print("‚úÖ Insights Summary:")
print(f"""
1. The dataset contains {len(df)} patient records.
2. The most common disease is '{df['Disease'].mode()[0]}'.
3. The average hospital charge for male patients is {df[df['Gender']=='Male']['HospitalCharges'].mean():.2f}, 
   and for female patients is {df[df['Gender']=='Female']['HospitalCharges'].mean():.2f}.
4. There is a positive correlation between patient age and hospital charges.
5. Elderly patients tend to have higher hospital expenses on average.
""")

In [None]:
# üíæ Step 7: Export Cleaned Data and Graphs
df.to_csv("cleaned_patient_data.csv", index=False)
print("Cleaned dataset saved successfully!")

## üß† Step 8: Conclusion
- The dataset shows that chronic diseases like X and Y are most common.
- Average hospital costs increase with patient age.
- Gender-wise analysis shows similar treatment costs.
- Insights can help hospitals plan resource allocation.