In [3]:
# =============================================================================
# DIABETES 30-DAY READMISSION RISK PREDICTOR
# 01 — Data Ingestion & Enterprise-Grade SQL Layer
# Client: Dr. Sarah Chen, Chief Medical Officer, HealthFirst Network
# Consultant: Rabbi Islam Yeasin, IBM Certified Professional Data Scientist
# Date Started: December 05, 2025
# =============================================================================

import pandas as pd
import sqlite3
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Professional visualization setup
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")
sns.set_context("notebook", font_scale=1.1)

# =============================================================================
# 1. LOAD RAW DATA
# =============================================================================
data_dir = r"D:\Projects and All\gitupload\upload-folders\diabetes-readmission-predictor\data"

df = pd.read_csv(f"{data_dir}\\diabetic_data.csv", low_memory=False)
mapping = pd.read_csv(f"{data_dir}\\IDs_mapping.csv")

print(f"Raw dataset loaded successfully")
print(f"Shape → {df.shape[0]:,} patients × {df.shape[1]} features")
print(f"Memory usage → {df.memory_usage(deep=True).sum() / 1e6:.2f} MB")

# =============================================================================
# 2. CREATE ENTERPRISE SQLITE DATABASE
# =============================================================================
db_path = r"D:\Projects and All\gitupload\upload-folders\diabetes-readmission-predictor\diabetes_hospital.db"

conn = sqlite3.connect(db_path)
df.to_sql('patients', conn, if_exists='replace', index=False)

print(f"\nSQLite database created at:")
print(f"→ {db_path}")
print(f"Table 'patients' created with {len(df):,} records")

# Quick sanity check query
test_query = "SELECT COUNT(*) as total_patients FROM patients"
total = pd.read_sql(test_query, conn).iloc[0, 0]
print(f"Verification → {total:,} records in SQL table")

conn.close()

Raw dataset loaded successfully
Shape → 101,766 patients × 50 features
Memory usage → 230.91 MB

SQLite database created at:
→ D:\Projects and All\gitupload\upload-folders\diabetes-readmission-predictor\diabetes_hospital.db
Table 'patients' created with 101,766 records
Verification → 101,766 records in SQL table
