In [4]:
import pandas as pd

# Load dataset
df = pd.read_csv("final_task_dataset_balanced.csv")

# ===== Basic Info =====
print("Dataset shape:", df.shape)
print("\nColumns:", df.columns.tolist())
print("\nData types:")
print(df.dtypes)

# ===== Missing Values =====
print("\nMissing values per column:")
print(df.isnull().sum())

# ===== Unique Values =====
print("\nUnique values per column:")
for col in df.columns:
    print(f"{col}: {df[col].nunique()}")

# ===== Categorical Value Distribution =====
categorical_cols = ['priority', 'category', 'assigned_user', 'status', 'past_task_behavior']
for col in categorical_cols:
    if col in df.columns:
        print(f"\nDistribution for {col}:")
        print(df[col].value_counts())

# ===== Duplicate Check =====
print("\nDuplicate rows:", df.duplicated().sum())

# ===== Numeric Stats =====
print("\nNumeric Columns Summary:")
print(df.describe())


Dataset shape: (486, 22)

Columns: ['task_id', 'task_description', 'deadline', 'priority', 'assigned_user', 'user_workload', 'past_task_behavior', 'category', 'status', 'time_taken_(hours)', 'deadline_days_remaining', 'task_length', 'has_keyword_urgent', 'is_weekend_deadline', 'category_encoded', 'status_encoded', 'priority_encoded', 'is_completed', 'user_current_load', 'past_behavior_score', 'workload', 'deadline_days']

Data types:
task_id                     object
task_description            object
deadline                    object
priority                    object
assigned_user               object
user_workload              float64
past_task_behavior          object
category                    object
status                      object
time_taken_(hours)         float64
deadline_days_remaining    float64
task_length                float64
has_keyword_urgent         float64
is_weekend_deadline        float64
category_encoded           float64
status_encoded             float64
pr

In [5]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler

# ===== Load dataset =====
df = pd.read_csv("final_task_dataset_balanced.csv")

# ===== Standardize categorical values =====
# Fix past_task_behavior inconsistencies
behavior_map = {
    "On Time": "On Time",
    "on-time": "On Time",
    "Late": "Late",
    "late": "Late",
    "Needs Improvement": "Needs Improvement",
    "delayed": "Delayed",
    "Delayed": "Delayed",
    "Excellent": "Excellent"
}
df['past_task_behavior'] = df['past_task_behavior'].map(behavior_map)

# ===== Convert deadline to datetime =====
df['deadline'] = pd.to_datetime(df['deadline'], errors='coerce')

# ===== Recompute derived columns =====
today = pd.Timestamp.today()
df['deadline_days_remaining'] = (df['deadline'] - today).dt.days.clip(lower=0)
df['deadline_days'] = df['deadline_days_remaining']
df['task_length'] = df['task_description'].fillna("").apply(lambda x: len(str(x).split()))
df['has_keyword_urgent'] = df['task_description'].fillna("").apply(lambda x: 1 if 'urgent' in str(x).lower() else 0)
df['is_weekend_deadline'] = df['deadline'].dt.weekday.apply(lambda x: 1 if x in [5, 6] else 0)

# ===== Map past_task_behavior to numeric score =====
behavior_score_map = {
    "On Time": 1.0,
    "Late": 0.5,
    "Needs Improvement": 0.3,
    "Delayed": 0.3,
    "Excellent": 1.2
}
df['past_behavior_score'] = df['past_task_behavior'].map(behavior_score_map).fillna(0.5)

# ===== Impute missing values =====
# Numeric columns → median
numeric_cols = ['user_current_load', 'workload', 'time_taken_(hours)', 'deadline_days_remaining',
                'task_length', 'has_keyword_urgent', 'is_weekend_deadline', 'category_encoded',
                'status_encoded', 'priority_encoded', 'is_completed', 'past_behavior_score', 'deadline_days']
for col in numeric_cols:
    df[col] = df[col].fillna(df[col].median())

# Categorical columns → mode
categorical_cols = ['priority', 'category', 'status', 'assigned_user']
for col in categorical_cols:
    df[col] = df[col].fillna(df[col].mode()[0])

# ===== Normalize numeric columns =====
scaler = MinMaxScaler()
df[['user_current_load', 'workload', 'time_taken_(hours)', 'deadline_days_remaining', 'task_length']] = \
    scaler.fit_transform(df[['user_current_load', 'workload', 'time_taken_(hours)', 'deadline_days_remaining', 'task_length']])

# ===== Drop duplicates =====
df = df.drop_duplicates()

# ===== Save cleaned dataset =====
df.to_csv("fully_cleaned_task_dataset.csv", index=False)

# ===== Check balance =====
print("✅ Cleaning Complete!")
print("Dataset shape:", df.shape)
print("\nPriority distribution:\n", df['priority'].value_counts())
print("\nCategory distribution:\n", df['category'].value_counts())
print("\nStatus distribution:\n", df['status'].value_counts())


✅ Cleaning Complete!
Dataset shape: (390, 22)

Priority distribution:
 priority
High      127
Medium    120
Low       119
Urgent     24
Name: count, dtype: int64

Category distribution:
 category
DevOps           65
Development      65
Design           65
Documentation    65
Testing          65
Management       65
Name: count, dtype: int64

Status distribution:
 status
Completed      120
In Progress     96
To Do           90
Pending         84
Name: count, dtype: int64


In [6]:
import pandas as pd
import nltk, re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Download NLTK resources
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('punkt_tab')

# Load dataset
df = pd.read_csv("fully_cleaned_task_dataset.csv")

# Initialize tools
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

# Preprocessing function
def clean_text(text):
    if pd.isna(text):
        return ""
    text = text.lower()  # lowercase
    text = re.sub(r'[^a-z\s]', '', text)  # remove punctuation/numbers
    tokens = nltk.word_tokenize(text)  # tokenize
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]  # remove stopwords & lemmatize
    return " ".join(tokens)

# Apply NLP cleaning
df['task_description_clean'] = df['task_description'].apply(clean_text)

# Save updated dataset
df.to_csv("nlp_cleaned_task_dataset.csv", index=False)

print("✅ NLP preprocessing complete! Added column 'task_description_clean'.")
print(df[['task_description', 'task_description_clean']].head())


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


✅ NLP preprocessing complete! Added column 'task_description_clean'.
                           task_description  \
0  Setup monitoring for production servers.   
1              Restore database from backup   
2           Implement search functionality.   
3      Update color palette and typography.   
4        Write API reference documentation.   

               task_description_clean  
0  setup monitoring production server  
1             restore database backup  
2      implement search functionality  
3     update color palette typography  
4   write api reference documentation  


In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib

# === Load dataset ===
df.to_csv("nlp_cleaned_task_dataset.csv")

# === Features & Labels ===
X = df['task_description_clean']
y = df['category']

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# === Train-Test Split ===
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# === TF-IDF (optimized for size) ===
vectorizer = TfidfVectorizer(max_features=10000, ngram_range=(1,2), stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# === Base Models ===
nb = MultinomialNB()
rf = RandomForestClassifier(n_estimators=200, max_depth=25, random_state=42)
xgb = XGBClassifier(n_estimators=300, max_depth=6, learning_rate=0.1, subsample=0.8, colsample_bytree=0.8, eval_metric='mlogloss', random_state=42)

# === Voting Ensemble ===
voting_clf = VotingClassifier(
    estimators=[('nb', nb), ('rf', rf), ('xgb', xgb)],
    voting='soft',  # soft = uses predicted probabilities
    weights=[1,2,3]  # give higher weight to xgb
)

# === Train ===
voting_clf.fit(X_train_tfidf, y_train)

# === Evaluate ===
y_pred = voting_clf.predict(X_test_tfidf)
acc = accuracy_score(y_test, y_pred)
print(f"Voting Ensemble Accuracy: {acc:.4f}")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# === Save Models & Vectorizer ===
joblib.dump(voting_clf, "voting_ensemble_task_classifier.joblib", compress=3)
joblib.dump(vectorizer, "task_tfidf_vectorizer.joblib", compress=3)
joblib.dump(label_encoder, "task_label_encoder.joblib", compress=3)

print("✅ Ensemble model, TF-IDF vectorizer, and label encoder saved!")


Voting Ensemble Accuracy: 0.4103
               precision    recall  f1-score   support

       Design       0.33      0.23      0.27        13
       DevOps       0.32      0.46      0.38        13
  Development       0.30      0.23      0.26        13
Documentation       0.58      0.54      0.56        13
   Management       0.62      0.38      0.48        13
      Testing       0.40      0.62      0.48        13

     accuracy                           0.41        78
    macro avg       0.43      0.41      0.40        78
 weighted avg       0.43      0.41      0.40        78

✅ Ensemble model, TF-IDF vectorizer, and label encoder saved!


In [8]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from xgboost import XGBClassifier
from sklearn.utils.class_weight import compute_class_weight

# === Load preprocessed dataset ===
df = pd.read_csv("nlp_cleaned_task_dataset.csv")  # use cleaned + NLP dataset
X = df['task_description_clean']
y = df['priority']

# === Encode priority labels ===
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# === Train/Test Split ===
X_train_raw, X_test_raw, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# === Compute Class Weights ===
class_weights = compute_class_weight(class_weight='balanced', classes=label_encoder.classes_, y=y)
class_weight_dict = {i: w for i, w in enumerate(class_weights)}

# === TF-IDF Vectorizer (size tuned for XGBoost) ===
tfidf_xgb = TfidfVectorizer(max_features=3000, ngram_range=(1,2))
X_train_vec = tfidf_xgb.fit_transform(X_train_raw)
X_test_vec = tfidf_xgb.transform(X_test_raw)

# === XGBoost Model (lightweight params) ===
xgb_model = XGBClassifier(
    n_estimators=200,
    max_depth=10,
    learning_rate=0.1,
    scale_pos_weight=2,  # Helps balance
    use_label_encoder=False,
    eval_metric='mlogloss',
    random_state=42
)

# === Train ===
xgb_model.fit(X_train_vec, y_train)

# === Evaluate ===
y_pred = xgb_model.predict(X_test_vec)
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# === Save model, vectorizer, and encoder ===
joblib.dump(xgb_model, "priority_xgboost.pkl")
joblib.dump(tfidf_xgb, "priority_tfidf_vectorizer.pkl")
joblib.dump(label_encoder, "priority_label_encoder.pkl")

print("\n✅ XGBoost model, TF-IDF vectorizer, and label encoder saved!")


Parameters: { "scale_pos_weight", "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoost Accuracy: 0.8461538461538461
              precision    recall  f1-score   support

        High       0.79      0.92      0.85        25
         Low       0.88      0.96      0.92        24
      Medium       1.00      0.75      0.86        24
      Urgent       0.40      0.40      0.40         5

    accuracy                           0.85        78
   macro avg       0.77      0.76      0.76        78
weighted avg       0.86      0.85      0.85        78


✅ XGBoost model, TF-IDF vectorizer, and label encoder saved!


In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
import joblib
from scipy.sparse import hstack

# === Load cleaned dataset ===
df = pd.read_csv("nlp_cleaned_task_dataset.csv")

# === 1. Unify usernames ===
df['assigned_user'] = df['assigned_user'].str.lower().str.replace(' ', '').str.replace('-', '_')
df['assigned_user'] = df['assigned_user'].apply(lambda x: x if x.startswith('user_') else x.replace('user', 'user_'))

# === 2. Group rare users ===
user_counts = df['assigned_user'].value_counts()
rare_users = user_counts[user_counts < 5].index
df['assigned_user'] = df['assigned_user'].apply(lambda x: 'other' if x in rare_users else x)

print("New user distribution:\n", df['assigned_user'].value_counts())

# === 3. Encode target ===
le = LabelEncoder()
df['assigned_user_encoded'] = le.fit_transform(df['assigned_user'])

# === Features ===
text_features = df['task_description_clean']
numeric_features = df[['category_encoded', 'priority_encoded', 'deadline_days',
                       'has_keyword_urgent', 'task_length', 'user_current_load',
                       'user_workload', 'past_behavior_score', 'is_weekend_deadline']]

# Save the order of numeric feature names for dashboard consistency
feature_names = list(numeric_features.columns)
joblib.dump(feature_names, "user_assignment_feature_names.pkl")

# === 4. TF-IDF Vectorization ===
tfidf = TfidfVectorizer(max_features=300)
X_text = tfidf.fit_transform(text_features)
scaler = StandardScaler()
X_num = scaler.fit_transform(numeric_features)

X = hstack([X_text, X_num])
y = df['assigned_user_encoded']

# === 5. Train/Test Split ===
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# === 6. SMOTE Oversampling ===
smote = SMOTE(random_state=42)
X_train_bal, y_train_bal = smote.fit_resample(X_train, y_train)

# === 7. Compute class weights ===
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train_bal), y=y_train_bal)
class_weight_dict = {i: w for i, w in enumerate(class_weights)}

# === 8. Train XGBoost ===
xgb_model = XGBClassifier(
    n_estimators=200,
    max_depth=8,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    scale_pos_weight=1  # XGB auto-balances, but we oversampled too
)
xgb_model.fit(X_train_bal, y_train_bal)

# === 9. Evaluate ===
y_pred = xgb_model.predict(X_test)
print("\nXGBoost Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=le.classes_))

# === 10. Save everything ===
joblib.dump(xgb_model, "user_assignment_xgb.pkl")
joblib.dump(tfidf, "user_assignment_tfidf.pkl")
joblib.dump(le, "user_assignment_label_encoder.pkl")
joblib.dump(scaler, "user_assignment_scaler.pkl")
joblib.dump(feature_names, "user_assignment_feature_names.pkl")  # <--- ADDED

print("✅ User assignment model, TF-IDF, label encoder, scaler, and feature names saved!")


New user distribution:
 assigned_user
user_7     49
user_1     45
user_6     41
user_8     41
user_10    41
user_5     40
user_2     36
user_9     36
user_4     33
user_3     28
Name: count, dtype: int64


Parameters: { "scale_pos_weight" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



XGBoost Accuracy: 0.6153846153846154
              precision    recall  f1-score   support

      user_1       1.00      0.78      0.88         9
     user_10       0.00      0.00      0.00         8
      user_2       0.60      0.86      0.71         7
      user_3       0.83      0.83      0.83         6
      user_4       0.86      0.86      0.86         7
      user_5       0.50      0.38      0.43         8
      user_6       0.78      0.88      0.82         8
      user_7       0.50      0.50      0.50        10
      user_8       0.70      0.88      0.78         8
      user_9       0.22      0.29      0.25         7

    accuracy                           0.62        78
   macro avg       0.60      0.62      0.61        78
weighted avg       0.60      0.62      0.60        78

✅ User assignment model, TF-IDF, label encoder, scaler, and feature names saved!


In [12]:
!pip install -q streamlit
import streamlit as st
import pandas as pd
import joblib
from datetime import datetime, date
from scipy.sparse import hstack

# === Load all models and encoders ===
category_model = joblib.load("voting_ensemble_task_classifier.joblib")
category_vectorizer = joblib.load("task_tfidf_vectorizer.joblib")
category_label_encoder = joblib.load("task_label_encoder.joblib")

priority_model = joblib.load("priority_xgboost.pkl")
priority_vectorizer = joblib.load("priority_tfidf_vectorizer.pkl")
priority_label_encoder = joblib.load("priority_label_encoder.pkl")

user_model = joblib.load("user_assignment_xgb.pkl")
user_vectorizer = joblib.load("user_assignment_tfidf.pkl")
user_label_encoder = joblib.load("user_assignment_label_encoder.pkl")
user_scaler = joblib.load("user_assignment_scaler.pkl")
user_feature_names = joblib.load("user_assignment_feature_names.pkl")

# === Load dataset (for dropdown) ===
df = pd.read_csv("nlp_cleaned_task_dataset.csv")

# === Helper function: Days left ===
def calculate_days_left(deadline_str):
    try:
        deadline = datetime.strptime(deadline_str, "%Y-%m-%d")
        return max((deadline - datetime.now()).days, 0)
    except:
        return 0

# === Prediction function ===
def predict_all(task_description, deadline):
    # --- Category prediction ---
    X_cat = category_vectorizer.transform([task_description])
    category_pred = category_model.predict(X_cat)[0]
    category_name = category_label_encoder.inverse_transform([category_pred])[0]

    # --- Priority prediction ---
    X_pri = priority_vectorizer.transform([task_description])
    priority_pred = priority_model.predict(X_pri)[0]
    priority_name = priority_label_encoder.inverse_transform([priority_pred])[0]

    # --- Base numeric features ---
    sample_row = df.sample(1).iloc[0]  # For filling context-dependent features
    base_numeric = {
        'category_encoded': category_pred,
        'priority_encoded': priority_pred,
        'deadline_days': calculate_days_left(deadline),
        'has_keyword_urgent': int("urgent" in task_description.lower()),
        'task_length': len(task_description.split()),
        'user_current_load': sample_row['user_current_load'],
        'user_workload': sample_row['user_workload'],
        'past_behavior_score': sample_row['past_behavior_score'],
        'is_weekend_deadline': 1 if datetime.strptime(deadline, "%Y-%m-%d").weekday() >= 5 else 0
    }
    numeric_features = pd.DataFrame([base_numeric])

    # --- Ensure all training columns exist ---
    for col in user_feature_names:
        if col not in numeric_features.columns:
            numeric_features[col] = 0  # Fill missing features with 0

    numeric_features = numeric_features[user_feature_names]  # Reorder

    # --- Scale numeric ---
    numeric_scaled = user_scaler.transform(numeric_features)

    # --- Combine text + numeric ---
    X_text_user = user_vectorizer.transform([task_description])
    X_user_final = hstack([X_text_user, numeric_scaled])

    # --- User prediction ---
    user_pred = user_model.predict(X_user_final)[0]
    assigned_user = user_label_encoder.inverse_transform([user_pred])[0]

    # --- Days left ---
    days_left = calculate_days_left(deadline)

    return category_name, priority_name, assigned_user, days_left

# === Streamlit UI ===
st.set_page_config(page_title="AI Task Management System", layout="wide")
st.title("AI-Powered Task Management System")

# --- Task input ---
task_description = st.selectbox("Select a Task from Dataset", df['task_description_clean'].unique())

# Deadline should start from today
deadline = st.date_input("Deadline (YYYY-MM-DD)", min_value=date.today())
deadline_str = deadline.strftime("%Y-%m-%d")

# --- Predict button ---
if st.button("Assign Task"):
    category_name, priority_name, assigned_user, days_left = predict_all(task_description, deadline_str)

    st.subheader("Prediction Results:")
    st.write(f"**Task Category:** {category_name}")
    st.write(f"**Task Priority:** {priority_name}")
    st.write(f"**Assigned User:** {assigned_user}")
    st.write(f"**Days Left until Deadline:** {days_left}")

2025-07-24 16:08:55.453 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-07-24 16:08:55.468 Session state does not function when running a script without `streamlit run`


In [14]:
dashboard_code = """

import streamlit as st
import pandas as pd
import joblib
from datetime import datetime, date
from scipy.sparse import hstack

# === Load all models and encoders ===
category_model = joblib.load("voting_ensemble_task_classifier.joblib")
category_vectorizer = joblib.load("task_tfidf_vectorizer.joblib")
category_label_encoder = joblib.load("task_label_encoder.joblib")

priority_model = joblib.load("priority_xgboost.pkl")
priority_vectorizer = joblib.load("priority_tfidf_vectorizer.pkl")
priority_label_encoder = joblib.load("priority_label_encoder.pkl")

user_model = joblib.load("user_assignment_xgb.pkl")
user_vectorizer = joblib.load("user_assignment_tfidf.pkl")
user_label_encoder = joblib.load("user_assignment_label_encoder.pkl")
user_scaler = joblib.load("user_assignment_scaler (1).pkl")
user_feature_names = joblib.load("user_assignment_feature_names.pkl")

# === Load dataset (for dropdown) ===
df = pd.read_csv("nlp_cleaned_task_dataset.csv")

# === Helper function: Days left ===
def calculate_days_left(deadline_str):
    try:
        deadline = datetime.strptime(deadline_str, "%Y-%m-%d")
        return max((deadline - datetime.now()).days, 0)
    except:
        return 0

# === Prediction function ===
def predict_all(task_description, deadline):
    # --- Category prediction ---
    X_cat = category_vectorizer.transform([task_description])
    category_pred = category_model.predict(X_cat)[0]
    category_name = category_label_encoder.inverse_transform([category_pred])[0]

    # --- Priority prediction ---
    X_pri = priority_vectorizer.transform([task_description])
    priority_pred = priority_model.predict(X_pri)[0]
    priority_name = priority_label_encoder.inverse_transform([priority_pred])[0]

    # --- Base numeric features ---
    sample_row = df.sample(1).iloc[0]  # For filling context-dependent features
    base_numeric = {
        'category_encoded': category_pred,
        'priority_encoded': priority_pred,
        'deadline_days': calculate_days_left(deadline),
        'has_keyword_urgent': int("urgent" in task_description.lower()),
        'task_length': len(task_description.split()),
        'user_current_load': sample_row['user_current_load'],
        'user_workload': sample_row['user_workload'],
        'past_behavior_score': sample_row['past_behavior_score'],
        'is_weekend_deadline': 1 if datetime.strptime(deadline, "%Y-%m-%d").weekday() >= 5 else 0
    }
    numeric_features = pd.DataFrame([base_numeric])

    # --- Ensure all training columns exist ---
    for col in user_feature_names:
        if col not in numeric_features.columns:
            numeric_features[col] = 0  # Fill missing features with 0

    numeric_features = numeric_features[user_feature_names]  # Reorder

    # --- Scale numeric ---
    numeric_scaled = user_scaler.transform(numeric_features)

    # --- Combine text + numeric ---
    X_text_user = user_vectorizer.transform([task_description])
    X_user_final = hstack([X_text_user, numeric_scaled])

    # --- User prediction ---
    user_pred = user_model.predict(X_user_final)[0]
    assigned_user = user_label_encoder.inverse_transform([user_pred])[0]

    # --- Days left ---
    days_left = calculate_days_left(deadline)

    return category_name, priority_name, assigned_user, days_left

# === Streamlit UI ===
st.set_page_config(page_title="AI Task Management System", layout="wide")
st.title("AI-Powered Task Management System")

# --- Task input ---
task_description = st.selectbox("Select a Task from Dataset", df['task_description_clean'].unique())

# Deadline should start from today
deadline = st.date_input("Deadline (YYYY-MM-DD)", min_value=date.today())
deadline_str = deadline.strftime("%Y-%m-%d")

# --- Predict button ---
if st.button("Assign Task"):
    category_name, priority_name, assigned_user, days_left = predict_all(task_description, deadline_str)

    st.subheader("Prediction Results:")
    st.write(f"**Task Category:** {category_name}")
    st.write(f"**Task Priority:** {priority_name}")
    st.write(f"**Assigned User:** {assigned_user}")
    st.write(f"**Days Left until Deadline:** {days_left}")
"""

# Save to file
with open("dashboard.py", "w") as f:
    f.write(dashboard_code)

print("✅ Dashboard file saved as dashboard.py")


✅ Dashboard file saved as dashboard.py
