In [None]:
# 📌 Step 1: Load Dataset
print("📥 Step 1: Load Dataset")

from file_handler import load_data

# Load the data and handle DB storage inside load_data()
df, meta = load_data()

# Preview the data
if df is not None:
    print("\n✅ Data loaded successfully. Here's a preview:")
    print(df.head())

    # Save metadata: store origin so it can be referenced in future steps
    with open("data_origin.txt", "w") as f:
        f.write("db" if meta.get("db") else "file")

    # Optionally store to CSV for later use if not stored in DB
    if not meta.get("db"):
        df.to_csv("loaded_data.csv", index=False)
        print("💾 Data saved locally as 'loaded_data.csv'.")

else:
    print("❌ Failed to load data.")


In [None]:
# 📌 Step 2: Load Data for Understanding
print("📖 Step 2: Understanding Dataset")

from data_understanding import fetch_data_from_file, fetch_data_from_db, data_summary
import os

# Determine the data source from step 1
if os.path.exists("data_origin.txt"):
    with open("data_origin.txt", "r") as f:
        origin = f.read().strip()
else:
    origin = "file"  # fallback

# Load based on origin
if origin == "db":
    df = fetch_data_from_db()
else:
    df = fetch_data_from_file()

# Show summary if data is loaded
if df is not None:
    print("✅ Data loaded successfully for understanding.\n")
    data_summary(df)
else:
    print("⚠️ Failed to load data. Please check previous steps or data source.")


In [None]:
# 📌 Step 3: Preprocess the Data
print("🧼 Step 3: Data Preprocessing")

from preprocessing import preprocess_pipeline

preprocessed_df = preprocess_pipeline(df)
print("✅ Data preprocessing complete.")
print(preprocessed_df.head())


In [None]:
# 📌 Step 4: Visualize the Data
print("📊 Step 4: Data Visualization")

import data_visualization as viz

viz.plot_all_graphs(preprocessed_df)
print("✅ Visualizations completed.")


In [None]:
# 📌 Step 5: Data Labeling
print("🏷️ Step 5: Label Your Data")

from data_labeling import label_data

print("Available columns:")
print(preprocessed_df.columns.tolist())

target_column = input("Enter the target (dependent) column name: ").strip()
labeled_df = label_data(preprocessed_df, target_column)

print(f"✅ Target column '{target_column}' has been encoded and labeled.")


In [None]:
# 📌 Step 6: Train a Model
print("🤖 Step 6: Model Training")

from model_training import run_model_pipeline  # ✅ Correct function name

problem_type = input("Is your problem supervised or unsupervised? (or press Enter to auto-detect) ").strip().lower()

if problem_type in ["", "supervised"]:
    task_type = input("Is it classification or regression? (or press Enter to auto-detect) ").strip().lower()

    if task_type == "classification":
        print("Available models: logistic, decision_tree, random_forest, svm")
    elif task_type == "regression":
        print("Available models: linear, decision_tree, random_forest, svm")
    elif task_type == "":
        print("ℹ️ Auto-detecting task type based on target column...")
    else:
        raise ValueError("❌ Invalid supervised task type.")

    model_choice = input("Enter model to train (or press Enter for default): ").strip().lower() or None

    # Use None for auto-detection if not provided
    task_type = task_type if task_type in ["classification", "regression"] else None
    model = run_model_pipeline(labeled_df, target_column, task_type, model_choice)

elif problem_type == "unsupervised":
    print("⚠️ Unsupervised learning not implemented yet.")
else:
    raise ValueError("❌ Invalid problem type.")


In [None]:
# 📌 Step 7: Save Model
print("💾 Step 7: Save Your Trained Model")

from model_saver import save_model

save_model(model)
print("✅ Model saved as a pickle file for deployment.")
