# Chronic Disease Management System
This project analyzes chronic disease data from CDC and WHO to provide insights into disease trends and patient outcomes.
The notebook covers data ingestion, preprocessing, analysis, visualization, and automation processes.

In [1]:
# Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import plotly.express as px

## Data Ingestion

In [2]:
# Load CDC and WHO datasets
cdc_data = pd.read_csv('CDC_Chronic_Disease_Data_Large.csv')
who_data = pd.read_csv('WHO_Chronic_Disease_Data_Large.csv')

# Display first few rows
cdc_data.head(), who_data.head()

## Data Preprocessing

In [3]:
# Handle missing values
cdc_data.fillna(method='ffill', inplace=True)
who_data.fillna(method='bfill', inplace=True)

# Feature engineering
cdc_data['Risk_Score'] = cdc_data['Blood_Pressure'] * 0.3 + cdc_data['Blood_Glucose'] * 0.4 + cdc_data['Cholesterol'] * 0.3
who_data['Health_Index'] = who_data['BMI'] * 0.5 + who_data['Physical_Activity (hrs/week)'] * 0.4 - who_data['Mortality_Risk (%)'] * 0.6

## Data Analysis and Visualization

In [4]:
# Visualize distribution of diseases
plt.figure(figsize=(12, 6))
sns.countplot(x='Disease', data=cdc_data, palette='coolwarm')
plt.title('Distribution of Diseases in CDC Dataset')
plt.show()

# Plotly visualization for WHO Data
fig = px.scatter(who_data, x='BMI', y='Mortality_Risk (%)', color='Country', size='Physical_Activity (hrs/week)', title='WHO Health Data Analysis')
fig.show()

## Machine Learning for Prediction

In [5]:
# Prepare data for ML
X = cdc_data[['Age', 'Blood_Pressure', 'Blood_Glucose', 'Cholesterol']]
y = cdc_data['Disease']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluate
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

## Automating Deployment (CI/CD)

In [6]:
# Simulate CI/CD pipeline trigger
def deploy_model():
    print('Model Deployed Successfully!')

# Automate testing
if classification_report(y_test, y_pred):
    deploy_model()