# 🧪 Refill Prediction – Starter ML Notebook

This notebook demonstrates how to use simple ML (logistic regression) to predict whether a patient refills their medication on time.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

sns.set(style="whitegrid")


In [None]:
# Sample synthetic dataset
df = pd.DataFrame({
    'PatientID': [101, 102, 103, 104],
    'Age': [45, 60, 30, 50],
    'Gender': ['F', 'M', 'F', 'M'],
    'RefillGapDays': [2, 10, 0, 7],
    'RefillOnTime': [1, 0, 1, 0]
})

df.head()


In [None]:
# EDA
print(df.describe())

plt.figure(figsize=(6,4))
sns.histplot(df['RefillGapDays'], bins=5, kde=True)
plt.title("Distribution of Refill Gap Days")
plt.xlabel("RefillGapDays")
plt.ylabel("Count")
plt.show()


In [None]:
# Encode gender, split features and target
df_encoded = pd.get_dummies(df, columns=['Gender'], drop_first=True)

X = df_encoded[['Age', 'RefillGapDays', 'Gender_M']]
y = df_encoded['RefillOnTime']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


In [None]:
# Train and evaluate logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Classification Report:\n")
print(classification_report(y_test, y_pred))
