## Importing Packages

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import os
from datetime import datetime   

## Pathways for our data pipeline

In [2]:
# Paths
input_path = "C:/Users/USER/Documents/Predictive_Model_Pipeline/data/health_care_Dataset.csv"
output_path = "C:/Users/USER/Documents/Predictive_Model_Pipeline/data/predicted_output.csv"
log_path = "C:/Users/USER/Documents/Predictive_Model_Pipeline/logs/run_log.txt"

## Loading the Health care Dataset

In [5]:
# Load Data

df = pd.read_csv("C:/Users/USER/Documents/Predictive_Model_Pipeline/scripts/Health_care_patient_Dataset.csv")

C:\Users\USER\Documents\Predictive_Model_Pipeline\scripts


## Preprocessing of the dataset

In [7]:
# Preprocessing

df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})
df['High_LOS'] = (df['Length of Stay (LOS)'] > df['Length of Stay (LOS)'].median()).astype(int)


In [8]:
# Encode binary features

df['Medication Adherence'] = df['Medication Adherence'].map({'Yes': 1, 'No': 0})
df['Follow-up Appointment'] = df['Follow-up Appointment'].map({'Yes': 1, 'No': 0})

## Prediction Model

In [9]:
# Model

features = ['Age', 'Gender', 'Medication Adherence', 'Follow-up Appointment']
X = df[features]
y = df['High_LOS']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier()
model.fit(X_train, y_train)

df['Predicted_High_LOS'] = model.predict(X)

## Output Data

In [10]:
# Save prediction

df.to_csv(output_path, index=False)


In [23]:
try:
    

    # Log success
    with open(log_path, 'a') as f:
        f.write(f"[{datetime.now()}] Prediction run successful. Output saved.\n")

except Exception as e:
    # Log error
    with open(log_path, 'a') as f:
        f.write(f"[{datetime.now()}] Error: {e}\n")

In [24]:
from sqlalchemy import create_engine

conn_str = (
    "mssql+pyodbc://DESKTOP-7JNGS66\VANHELSON\\\SQLEXPRESS/Health_Care_Predictive_Model"
    "?trusted_connection=yes&driver=ODBC+Driver+17+for+SQL+Server"
)
engine = create_engine(conn_str)
