<a href="https://colab.research.google.com/github/Munshid-mhd/AI-TASK-MANAGMENT-SYSTEM/blob/main/Untitled39.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
Project 1 â€” AI-Powered Task Management System (Split by Weeks)
Technologies: NLP, Automation, Time Series Forecasting

Week 1: EDA and Preprocessing
Week 2: Visualization and Feature Engineering
Week 3: Forecasting (ARIMA, Prophet)
Week 4: Machine Learning Models (Classification, Regression)

Input: /mnt/data/synthetic_task_dataset.csv
"""

import warnings
warnings.filterwarnings('ignore')

import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

try:
    from statsmodels.tsa.arima.model import ARIMA
    statsmodels_available = True
except Exception:
    statsmodels_available = False

try:
    from prophet import Prophet
    prophet_available = True
except Exception:
    prophet_available = False

DATA_PATH = Path('synthetic_task_dataset.csv')

# ----------------------------
# WEEK 1: EDA & PREPROCESSING
# ----------------------------

def load_data(path=DATA_PATH):
    df = pd.read_csv(path)
    print(f"Loaded dataset: {df.shape[0]} rows, {df.shape[1]} columns")
    return df


def basic_eda(df):
    print("\n--- DATA INFO ---")
    print(df.info())
    print("\n--- NUMERIC SUMMARY ---")
    print(df.describe().T)
    print("\n--- MISSING VALUES ---")
    print(df.isnull().sum())
    print("\n--- SAMPLE ROWS ---")
    print(df.head())


def preprocess(df):
    df = df.copy()

    for col in ['created_at', 'due_date', 'completed_at']:
        if col in df.columns:
            df[col] = pd.to_datetime(df[col], errors='coerce')

    if 'status' not in df.columns and 'completed_at' in df.columns:
        df['status'] = np.where(df['completed_at'].notna(), 'completed', 'open')

    if 'duration_minutes' not in df.columns and {'created_at', 'completed_at'}.issubset(df.columns):
        df['duration_minutes'] = (df['completed_at'] - df['created_at']).dt.total_seconds() / 60.0

    num_cols = df.select_dtypes(include=[np.number]).columns
    for col in num_cols:
        df[col].fillna(df[col].median(), inplace=True)

    cat_cols = df.select_dtypes(include=['object']).columns
    for col in cat_cols:
        df[col].fillna('unknown', inplace=True)

    return df


# ----------------------------
# WEEK 2: VISUALIZATION & FEATURE ENGINEERING
# ----------------------------

def feature_engineering(df):
    df = df.copy()
    if 'created_at' in df.columns:
        df['created_dow'] = df['created_at'].dt.dayofweek
        df['created_hour'] = df['created_at'].dt.hour
        df['created_month'] = df['created_at'].dt.month

    if 'title' in df.columns:
        df['title_len'] = df['title'].astype(str).apply(len)

    if {'due_date', 'created_at'}.issubset(df.columns):
        df['days_to_due'] = (df['due_date'] - df['created_at']).dt.days

    return df


def visualize_data(df):
    if 'status' in df.columns:
        plt.figure(figsize=(6,4))
        sns.countplot(x='status', data=df)
        plt.title('Task Status Distribution')
        plt.show()

    if 'created_dow' in df.columns:
        plt.figure(figsize=(6,4))
        sns.countplot(x='created_dow', data=df)
        plt.title('Tasks by Day of Week')
        plt.show()

    if 'duration_minutes' in df.columns:
        plt.figure(figsize=(6,4))
        sns.histplot(df['duration_minutes'], bins=30)
        plt.title('Task Duration Distribution')
        plt.show()


# ----------------------------
# WEEK 3: FORECASTING
# ----------------------------

def prepare_timeseries(df):
    if 'created_at' not in df.columns:
        return None
    ts = df.set_index('created_at').resample('D').size().rename('task_count')
    return ts


def arima_forecast(ts, steps=30):
    if not statsmodels_available:
        print("ARIMA not available.")
        return None
    model = ARIMA(ts, order=(5,1,0))
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=steps)
    print(forecast.head())
    return forecast


def prophet_forecast(df, periods=30):
    if not prophet_available:
        print("Prophet not available.")
        return None
    ts = df[['created_at']].copy()
    ts['ds'] = ts['created_at']
    daily = ts.groupby('ds').size().reset_index(name='y')
    model = Prophet()
    model.fit(daily)
    future = model.make_future_dataframe(periods=periods)
    forecast = model.predict(future)
    print(forecast[['ds','yhat']].tail())
    return forecast


# ----------------------------
# WEEK 4: MACHINE LEARNING MODEL
# ----------------------------

def supervised_learning(df):
    if 'status' not in df.columns:
        print('No status column found.')
        return None

    df['target'] = (df['status'] == 'completed').astype(int)
    num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    features = [c for c in num_cols if c not in ['target']]
    print("Auto-selected features:", features)

    X = df[features]
    y = df['target']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    print('\n--- MODEL PERFORMANCE ---')
    print(classification_report(y_test, preds))

    return model


# ----------------------------
# MAIN EXECUTION FLOW
# ----------------------------

def main():
    # Week 1
    df = load_data()
    basic_eda(df)
    df = preprocess(df)

    # Week 2
    df = feature_engineering(df)
    visualize_data(df)

    # Week 3
    ts = prepare_timeseries(df)
    if ts is not None:
        arima_forecast(ts)
        prophet_forecast(df)

    # Week 4
    supervised_learning(df)

if __name__ == '__main__':
    main()
