In [None]:
# 📌 Step 1: Load Dataset
print("📥 Step 1: Load Dataset")

from file_handler import load_data, store_data_in_database

# Load data from file
df = load_data()

# Show data preview
print("\n✅ Data loaded successfully. Here's a preview:")
print(df.head())

# Ask if user wants to store in database
store_choice = input("\n📦 Do you want to store this dataset in a database? (yes/no): ").strip().lower()

if store_choice == 'yes':
    print("\n🧾 Available columns:")
    print(df.columns.tolist())
    
    change_cols = input("✏️ Do you want to rename columns before storing? (yes/no): ").strip().lower()
    
    if change_cols == 'yes':
        new_cols = []
        for col in df.columns:
            new_name = input(f"Enter new name for column '{col}': ")
            new_cols.append(new_name)
        df.columns = new_cols

    # Get DB credentials
    db_name = input("Enter database name: ")
    table_name = input("Enter table name: ")
    user = input("Enter DB username (e.g., root): ")
    password = input("Enter DB password: ")
    host = input("Enter DB host (default 'localhost'): ") or "localhost"
    port = input("Enter DB port (default '3306'): ") or "3306"

    store_data_in_database(df, db_name, table_name, user, password, host, port)
    print(f"✅ Data stored in database '{db_name}', table '{table_name}'.")
else:
    print("✅ Proceeding with file-based data without storing in DB.")


In [None]:
# 📌 Step 2: Load Data for Understanding
print("📖 Step 2: Understanding Dataset")

from data_understanding import load_existing_data, full_data_overview

# Load the data based on previous storage choice (file or DB)
df = load_existing_data()

if df is not None:
    print("✅ Data loaded successfully for understanding.\n")
    full_data_overview(df)
else:
    print("⚠️ Failed to load data. Please check previous steps or data source.")


In [None]:
# 📌 Step 3: Preprocess the Data
print("🧼 Step 3: Data Preprocessing")

from preprocessing import preprocess_pipeline

preprocessed_df = preprocess_pipeline(df)
print("✅ Data preprocessing complete.")
print(preprocessed_df.head())


In [None]:
# 📌 Step 4: Visualize the Data
print("📊 Step 4: Data Visualization")

import data_visualization as viz

viz.plot_all_graphs(preprocessed_df)
print("✅ Visualizations completed.")


In [None]:
# 📌 Step 5: Data Labeling
print("🏷️ Step 5: Label Your Data")

from data_labeling import label_data

print("Available columns:")
print(preprocessed_df.columns.tolist())
target_column = input("Enter the target (dependent) column name: ")
labeled_df = label_data(preprocessed_df, target_column)
print(f"✅ '{target_column}' set as dependent column.")


In [None]:
# 📌 Step 6: Train a Model
print("🤖 Step 6: Model Training")

from model_training import train_model_pipeline

problem_type = input("Is your problem supervised or unsupervised? ").strip().lower()

if problem_type == "supervised":
    task_type = input("Is it classification or regression? ").strip().lower()
    if task_type == "classification":
        print("Available models: logistic, decision_tree, random_forest, svm, knn, naive_bayes")
    elif task_type == "regression":
        print("Available models: linear, decision_tree, random_forest, svm")
    else:
        raise ValueError("Invalid supervised task type.")
    model_name = input("Enter model to train: ").strip()
    model, X_test, y_test = train_model_pipeline(labeled_df, target_column, task_type, model_name)
elif problem_type == "unsupervised":
    print("Available models: kmeans, dbscan, agglomerative")
    model_name = input("Enter model to train: ").strip()
    model, X_test, y_test = train_model_pipeline(labeled_df, target_column, "unsupervised", model_name)
else:
    raise ValueError("Invalid problem type.")


In [None]:
# 📌 Step 7: Save Model
print("💾 Step 7: Save Your Trained Model")

from model_saver import save_model

save_model(model)
print("✅ Model saved as a pickle file for deployment.")
