In [5]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
import numpy as np
import pickle  # Import the pickle library

# Load the dataset
file_path = r'data/admitted_patients_dataset_updated_updated.csv'
df = pd.read_csv(file_path)

# Preview the dataset
print(df.head())

# Define features and target
X = df[['Systolic BP', 'Diastolic BP', 'Fasting Glucose']]  # Features
y = df['No of Days Admitted']  # Target

# Handle missing values (if any)
X = X.fillna(X.mean())
y = y.fillna(y.mean())  # In case target has missing values

# Standardize the features (optional but recommended for some models)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Choose a model (RandomForestRegressor in this case)
model = RandomForestRegressor(random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Convert predictions to integers
y_pred_int = np.round(y_pred).astype(int)

# Evaluate the model using integer predictions
mae = mean_absolute_error(y_test, y_pred_int)
rmse = np.sqrt(mean_squared_error(y_test, y_pred_int))

print(f'Mean Absolute Error: {mae}')
print(f'Root Mean Squared Error: {rmse}')

# Predict for the entire dataset and convert to integers
df['Predicted_No_of_Days_Admitted'] = np.round(model.predict(X_scaled)).astype(int)

# Save the updated DataFrame back to the CSV file
output_file_path = r'data/admitted_patients_dataset_updated_updated.csv'
df.to_csv(output_file_path, index=False)

print(f"Updated dataset with predictions saved to {output_file_path}")

# Create a pickle file for the model
pickle_file_path = r'models/random_forest_model.pkl'
with open(pickle_file_path, 'wb') as file:
    pickle.dump(model, file)

print(f"Model saved to {pickle_file_path}")


  Patient ID              Name Gender  Age  \
0   PT3463IN     Jason Anthony   Male   70   
1   PT2574QI      James Obrien   Male   27   
2   PT9347FF       Kayla White   Male   60   
3   PT1625IA  Jessica Williams   Male   45   
4   PT1500WV    Todd Hernandez  Other   42   

                                             Address  Contact Details  \
0         4704 Sullivan Port, North Nicole, WV 13737     919680046961   
1   004 Adams Overpass, Port Deniseborough, MI 06185     917441378267   
2  2342 Veronica Lane Suite 338, Riverabury, AL 3...     918085717285   
3         7348 Campos Centers, Lake Joseph, GA 60610     917791131918   
4      540 Lori Shoals, West Traceyborough, MD 38000     918701471689   

           Attender  Systolic BP  Diastolic BP  Fasting Glucose  \
0       Shari Mcgee          134            65              101   
1        Corey Cole          176            84               85   
2   Crystal English          129            93              169   
3  Christina Tho