In [3]:
# Download the zip file from UCI repository
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00296/dataset_diabetes.zip

# Extract the zip file
!unzip dataset_diabetes.zip

# Check what files were extracted
!ls -la

--2025-11-26 13:28:38--  https://archive.ics.uci.edu/ml/machine-learning-databases/00296/dataset_diabetes.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘dataset_diabetes.zip’

dataset_diabetes.zi     [  <=>               ]   3.19M  8.91MB/s    in 0.4s    

2025-11-26 13:28:39 (8.91 MB/s) - ‘dataset_diabetes.zip’ saved [3347213]

Archive:  dataset_diabetes.zip
  inflating: dataset_diabetes/diabetic_data.csv  
  inflating: dataset_diabetes/IDs_mapping.csv  
total 3292
drwxr-xr-x 1 root root    4096 Nov 26 13:28 .
drwxr-xr-x 1 root root    4096 Nov 26 12:56 ..
drwxr-xr-x 4 root root    4096 Nov 20 14:30 .config
drwxr-xr-x 2 root root    4096 Nov 26 13:28 dataset_diabetes
-rw-r--r-- 1 root root 3347213 Nov 26 13:28 dataset_diabetes.zip
drwxr-xr-x 1 root root    4096 Nov 20 14:30 sample_data


In [4]:
# Install necessary libraries
!pip install scikit-learn joblib



In [5]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
import joblib

In [8]:
# Load the CSV file
!ls -la /content/dataset_diabetes/

total 18724
drwxr-xr-x 2 root root     4096 Nov 26 13:28 .
drwxr-xr-x 1 root root     4096 Nov 26 13:28 ..
-rw-r--r-- 1 root root 19159383 May 15  2014 diabetic_data.csv
-rw-r--r-- 1 root root     2547 May 15  2014 IDs_mapping.csv


In [9]:
# Once you find the CSV file name, load it
df = pd.read_csv('/content/dataset_diabetes/diabetic_data.csv')

In [10]:
# Check if data loaded correctly
print(f"Data shape: {df.shape}")
print("Columns:", df.columns.tolist()[:10])

Data shape: (101766, 50)
Columns: ['encounter_id', 'patient_nbr', 'race', 'gender', 'age', 'weight', 'admission_type_id', 'discharge_disposition_id', 'admission_source_id', 'time_in_hospital']


In [11]:
# Create binary target for 30-day readmission
df['readmit_30'] = (df['readmitted'] == '<30').astype(int)
print(f"Target distribution:\n{df['readmit_30'].value_counts()}")

Target distribution:
readmit_30
0    90409
1    11357
Name: count, dtype: int64


In [12]:
# Use simple numeric features
X = df[['time_in_hospital', 'num_lab_procedures', 'num_medications']]
y = df['readmit_30']

print(f"Features shape: {X.shape}")

# Train model
model = RandomForestClassifier(n_estimators=50, random_state=42)
model.fit(X, y)
print("Model trained!")

Features shape: (101766, 3)
Model trained!


In [13]:
# Save the trained model
joblib.dump(model, 'readmission_model.pkl')
print("Model saved successfully!")

Model saved successfully!


In [14]:
# Test loading the model
loaded_model = joblib.load('readmission_model.pkl')
print("Model loaded successfully!")

# Make a test prediction
test_sample = [[5, 40, 15]]  # time_in_hospital, num_lab_procedures, num_medications
prediction = loaded_model.predict(test_sample)
print(f"Test prediction: {prediction[0]}")

Model loaded successfully!
Test prediction: 0




In [17]:
from flask import Flask, request, jsonify
import joblib
import pandas as pd
import threading

# Load model
model = joblib.load('readmission_model.pkl')

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    data = request.json
    features = pd.DataFrame([[
        data['time_in_hospital'],
        data['num_lab_procedures'],
        data['num_medications']
    ]], columns=['time_in_hospital', 'num_lab_procedures', 'num_medications'])

    prediction = model.predict(features)[0]
    probability = model.predict_proba(features)[0][1]

    return jsonify({
        'readmission_risk': int(prediction),
        'probability': float(probability),
        'risk_level': 'High' if prediction == 1 else 'Low'
    })

# Run in background thread
def run_flask():
    app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False)

thread = threading.Thread(target=run_flask)
thread.daemon = True
thread.start()

print("Flask server is running on port 5000")

Flask server is running on port 5000
 * Serving Flask app '__main__'
 * Debug mode: off


In [18]:
import requests
import time

# Wait a moment for server to start
time.sleep(2)

# Test the API
test_data = {
    'time_in_hospital': 5,
    'num_lab_procedures': 40,
    'num_medications': 15
}

try:
    response = requests.post('http://localhost:5000/predict', json=test_data)
    print("API Response:", response.json())
except Exception as e:
    print("Error:", e)

INFO:werkzeug:127.0.0.1 - - [26/Nov/2025 13:42:02] "POST /predict HTTP/1.1" 200 -


API Response: {'probability': 0.19529079598853458, 'readmission_risk': 0, 'risk_level': 'Low'}


In [20]:
import requests

# Test data for a patient
patient_data = {
    'time_in_hospital': 5,
    'num_lab_procedures': 40,
    'num_medications': 15
}

# Send prediction request
response = requests.post('http://localhost:5000/predict', json=patient_data)
result = response.json()

print(f"Readmission Risk: {result['readmission_risk']}")
print(f"Probability: {result['probability']:.2f}")
print(f"Interpretation: {'HIGH risk - needs intervention' if result['readmission_risk'] == 1 else 'LOW risk'}")

INFO:werkzeug:127.0.0.1 - - [26/Nov/2025 13:44:48] "POST /predict HTTP/1.1" 200 -


Readmission Risk: 0
Probability: 0.20
Interpretation: LOW risk
