In [36]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [37]:
df=pd.read_csv("E:/CTS/all_features.csv")

In [38]:
df = df.dropna(subset=['Historical_Star_Rating'])

In [39]:
feature_columns = [col for col in df.columns if col not in [
    'CONTRACT_ID', 'Member_ID', 'Region', 'Historical_Star_Rating'
]]

In [40]:
target = 'Historical_Star_Rating'

In [41]:
X = df[feature_columns]
y = df[target]

In [42]:
X = pd.get_dummies(X, drop_first=True)

In [43]:
X_train, X_test, y_train, y_test = X, X, y, y

In [44]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [45]:
y_pred = model.predict(X_test)

In [46]:
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)


In [47]:
feature_importances = pd.DataFrame({
    'feature': X.columns,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=True)

In [48]:
weakest_features = feature_importances.head(3)

In [49]:
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-squared Score: {r2:.4f}")
print(f"Mean Squared Error (MSE): {mse:.4f}")


Root Mean Squared Error (RMSE): 0.1407
R-squared Score: 0.8648
Mean Squared Error (MSE): 0.0198


In [50]:
print("\nWeakest 3 Features (Lowest Importance):")
for i, (idx, row) in enumerate(weakest_features.iterrows(), 1):
    print(f"{i}. {row['feature']}: {row['importance']:.6f}")


Weakest 3 Features (Lowest Importance):
1. Health Plan Quality Improvement: 0.005774
2. Medication Adherence for Cholesterol (Statins): 0.005927
3. Diabetes Care  Eye Exam: 0.005936


In [51]:
import joblib
joblib.dump(model, 'weak_features.pkl')
print("Model saved successfully!")

Model saved successfully!


In [52]:
import pandas as pd
import numpy as np
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [53]:
df = pd.read_csv("all_features.csv")

In [54]:
df = df.dropna(subset=['Historical_Star_Rating'])

In [55]:
feature_columns = [col for col in df.columns if col not in [
    'CONTRACT_ID', 'Member_ID', 'Region', 'Historical_Star_Rating'
]]
target = 'Historical_Star_Rating'

In [56]:
X = df[feature_columns]
y = df[target]

In [57]:
X = pd.get_dummies(X, drop_first=True)

In [None]:
X_train, X_test, y_train, y_test = X, X, y, y

In [77]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [78]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

In [79]:
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R-squared Score: {r2:.4f}")
print(f"Mean Squared Error (MSE): {mse:.4f}")

Root Mean Squared Error (RMSE): 0.1407
R-squared Score: 0.8648
Mean Squared Error (MSE): 0.0198


In [80]:
feature_importances = pd.DataFrame({
    'feature': X.columns,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=True)

In [81]:
print("\nWeakest 3 Features (Lowest Importance):")
print(feature_importances.head(3))


Weakest 3 Features (Lowest Importance):
                                           feature  importance
26                 Health Plan Quality Improvement    0.005774
38  Medication Adherence for Cholesterol (Statins)    0.005927
8                         Diabetes Care  Eye Exam    0.005936


In [64]:
joblib.dump((model, X.columns), "encode.pkl")
print("\nModel and encoder saved as encode.pkl")


Model and encoder saved as encode.pkl


In [82]:
manual_row = {
    'Breast Cancer Screening': 0.465,
    'Colorectal Cancer Screening': 0.7983,
    'Annual Flu Vaccine': 0.792,
    'Monitoring Physical Activity': 0.396,
    'Special Needs Plan (SNP) Care Management': 0.5627,
    'Care for Older Adults  Medication Review': 0.4807,
    'Care for Older Adults  Pain Assessment': 0.5493,
    'Osteoporosis Management in Women who had a Fracture': 0.9938,
    'Diabetes Care  Eye Exam': 0.8195,
    'Diabetes Care  Blood Sugar Controlled': 0.8195,
    'Controlling Blood Pressure': 0.4028,
    'Reducing the Risk of Falling': 0.0538,
    'Improving Bladder Control': 0.0165,
    'Medication Reconciliation Post-Discharge': 0.0832,
    'Plan All-Cause Readmissions': 0.6585,
    'Statin Therapy for Patients with Cardiovascular Disease': 0.6173,
    'Transitions of Care': 0.3395,
    'Follow-up after Emergency Department Visit for People with Multiple High-Risk Chronic Conditions': 0.5756,
    'Getting Needed Care': 0.3635,
    'Getting Appointments and Care Quickly': 0.8397,
    'Customer Service': 0.4825,
    'Rating of Health Care Quality': 0.1974,
    'Rating of Health Plan': 0.4891,
    'Care Coordination': 0.2994,
    'Complaints about the Health Plan': 0.0719,
    'Members Choosing to Leave the Plan': 0.22,
    'Health Plan Quality Improvement': 0.2374,
    'Plan Makes Timely Decisions about Appeals': 0.0254,
    'Reviewing Appeals Decisions': 0.0588,
    'Call Center  Foreign Language Interpreter and TTY Availability': 0.0836,
    'Complaints about the Drug Plan': 0.0863,
    'Members Choosing to Leave the Plan (Drug Plan)': 0.0257,
    'Drug Plan Quality Improvement': 0.2806,
    'Rating of Drug Plan': 0.2641,
    'Getting Needed Prescription Drugs': 0.1252,
    'MPF Price Accuracy': 0.8833,
    'Medication Adherence for Diabetes Medications': 0.7639,
    'Medication Adherence for Hypertension (RAS antagonists)': 0.5588,
    'Medication Adherence for Cholesterol (Statins)': 0.783,
    'MTM Program Completion Rate for CMR': 0.6264,
    'Gender_encoded': 0,
    'Organization_encoded': 299
}

In [None]:

model_loaded, train_columns = joblib.load("encode.pkl")

In [None]:

manual_df = pd.DataFrame([manual_row])

In [None]:

manual_df = manual_df.reindex(columns=train_columns, fill_value=0)

In [None]:

manual_pred = model_loaded.predict(manual_df)[0]
print(f"\n Predicted Historical Star Rating for manual row: {manual_pred:.4f}")



 Predicted Historical Star Rating for manual row: 3.6090


In [70]:
pip install google-generativeai


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [87]:
import google.generativeai as genai


In [88]:
genai.configure(api_key="AIzaSyCaBxJSjuUEhyFFYfT4dJPEZoGZ3gTiCjA")

In [89]:
def business_fixes(feature_name):
    prompt = f"""
    The dataset feature is: {feature_name}.
    This feature has low importance in predicting healthcare star ratings.

    Give ONLY **real company-level strategies** that a healthcare organization
    (like a health plan, insurance company, or provider network) could implement
    to improve this metric in practice.

    Do not provide data preprocessing or feature engineering ideas. 
    Only give **business improvement actions** in bullet points.
    """
    model = genai.GenerativeModel("gemini-1.5-flash")  
    response = model.generate_content(prompt)
    return response.text.strip()

In [90]:
weak_features = feature_importances.head(3)['feature'].tolist()
print("Weakest 3 Features:", weak_features)


Weakest 3 Features: ['Health Plan Quality Improvement', 'Medication Adherence for Cholesterol (Statins)', 'Diabetes Care \x96 Eye Exam']


In [91]:
for f in weak_features:
    print(f"\n=== {f} ===")
    print(business_fixes(f))



=== Health Plan Quality Improvement ===
* **Invest in care management programs:**  Develop and implement robust programs for chronic disease management, preventive care, and transitional care to improve patient outcomes and reduce readmissions. This includes proactive outreach, personalized care plans, and coordination with specialists.

* **Enhance provider network quality:**  Implement stricter credentialing processes, focusing on quality metrics, patient satisfaction scores, and adherence to clinical guidelines.  Invest in provider training and education programs focused on evidence-based medicine and best practices.

* **Improve member engagement and communication:** Develop user-friendly tools and resources (e.g., mobile apps, online portals) to empower members to actively manage their health.  Increase proactive communication with members regarding preventive screenings, medication adherence, and upcoming appointments.

* **Implement a robust quality improvement program:** Estab