<a href="https://colab.research.google.com/github/Kdavis2025/Automating-Compliance-AI-and-Machine-Learning-Approaches-to-Achieviing-CMMC-2.0-Certification/blob/main/PO%26AM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#POA&M prioritization & remediation recommendation script

# 1) Upload & preprocessing via google.colab.files
from google.colab import files
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder

def load_poam_via_colab(prompt: str = "Upload your POA&M CSV file") -> pd.DataFrame:
    """
    Prompts you to upload a CSV from your local machine.
    Expects columns:
      - task_id
      - description
      - criticality (High/Medium/Low)
      - milestone_date (YYYY-MM-DD)
      - resources_required
      - past_outcome (success/failure) [for ML training]
      - remediation_type (categorical) [for ML training]
    """
    print(prompt)
    uploaded = files.upload()
    if not uploaded:
        raise FileNotFoundError("No file uploaded—cannot load POA&M data.")
    # grab the first uploaded file
    filename = next(iter(uploaded))
    df = pd.read_csv(filename, parse_dates=['milestone_date'])
    # map criticality to numeric
    df['crit_score'] = df['criticality'].map({'High':3, 'Medium':2, 'Low':1})
    # compute days to deadline
    today = pd.Timestamp(datetime.utcnow().date())
    df['days_to_deadline'] = (df['milestone_date'] - today).dt.days.clip(lower=0)
    return df

# 2) Priority scoring
def compute_priority(df: pd.DataFrame, w_crit: float = 0.7, w_time: float = 0.3) -> pd.DataFrame:
    df = df.copy()
    df['crit_norm'] = (df['crit_score'] - df['crit_score'].min()) / (df['crit_score'].max() - df['crit_score'].min())
    max_days = df['days_to_deadline'].max() or 1
    df['time_norm'] = 1 - (df['days_to_deadline'] / max_days)
    df['priority_score'] = w_crit * df['crit_norm'] + w_time * df['time_norm']
    return df.sort_values('priority_score', ascending=False)

# 3) Train ML model on historical remediation outcomes
def train_remediation_model(df: pd.DataFrame, model_type: str = 'classification'):
    features = ['crit_score', 'days_to_deadline']
    X = df[features].copy()

    if model_type == 'classification':
        le = LabelEncoder()
        y = le.fit_transform(df['remediation_type'])
        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)
        clf = RandomForestClassifier(n_estimators=100, random_state=42)
        clf.fit(X_train, y_train)
        print(f"Classification accuracy: {clf.score(X_test, y_test):.2%}")
        return clf, le

    elif model_type == 'regression':
        # assumes 'time_to_complete_days' column exists
        y = df['time_to_complete_days']
        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
        reg = LinearRegression()
        reg.fit(X_train, y_train)
        print(f"Regression R²: {reg.score(X_test, y_test):.3f}")
        return reg, None

    else:
        raise ValueError("model_type must be 'classification' or 'regression'")

# 4) Recommend remediation for new tasks
def recommend_remediation(df_new: pd.DataFrame, model, label_encoder=None) -> pd.DataFrame:
    df_new = df_new.copy()
    features = ['crit_score', 'days_to_deadline']
    X_new = df_new[features]
    preds = model.predict(X_new)
    if label_encoder:
        df_new['recommended_remediation'] = label_encoder.inverse_transform(preds)
    else:
        df_new['estimated_days_to_complete'] = preds
    return df_new

# =============================================================================
# Example Colab Workflow
# =============================================================================

# Step A: Upload historical POA&M (for training + priority)
poam_df = load_poam_via_colab("Step A: Upload historical POA&M CSV")
poam_df = compute_priority(poam_df)
print("Top 10 tasks by priority:")
print(poam_df[['task_id','priority_score']].head(10))

# Step B: Train your remediation‐type classifier
clf_model, clf_le = train_remediation_model(poam_df, model_type='classification')

# Step C: Upload new/unremediated POA&M items for recommendations
new_tasks = load_poam_via_colab("Step C: Upload new POA&M items CSV")
new_tasks = compute_priority(new_tasks)
recommendations = recommend_remediation(new_tasks, clf_model, clf_le)

print("\nRecommended remediation actions:")
print(recommendations[['task_id','recommended_remediation']])

Step A: Upload historical POA&M CSV


Saving poam_entries.csv to poam_entries.csv
Top 10 tasks by priority:
     task_id  priority_score
87  TASK-088        0.996471
53  TASK-054        0.989412
54  TASK-055        0.985882
64  TASK-065        0.982353
96  TASK-097        0.964706
78  TASK-079        0.961176
14  TASK-015        0.918824
97  TASK-098        0.918824
30  TASK-031        0.911765
84  TASK-085        0.897647
Classification accuracy: 32.00%
Step C: Upload new POA&M items CSV


Saving poam_entries.csv to poam_entries (1).csv

Recommended remediation actions:
     task_id recommended_remediation
74  TASK-075             Reconfigure
66  TASK-067             Reconfigure
5   TASK-006             Train Staff
62  TASK-063        Update Procedure
82  TASK-083        Update Procedure
..       ...                     ...
61  TASK-062        Update Procedure
17  TASK-018        Update Procedure
33  TASK-034        Update Procedure
12  TASK-013        Update Procedure
92  TASK-093        Update Procedure

[100 rows x 2 columns]
