In [5]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
import plotly.express as px
from dash import Dash, html, dcc, Input, Output
import logging
import warnings


# Suppress warnings for cleaner output
warnings.simplefilter(action='ignore', category=FutureWarning)

# Set up basic logging for monitoring
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Data Loading and Preprocessing Function
def load_and_preprocess_data(filepath):
    """Loads CSV data, cleans, and preprocesses it."""
    logging.info("Loading and preprocessing data from: " + filepath)
    df = pd.read_csv(filepath).dropna()
    binary_columns = ['Do you have Depression?', 'Do you have Anxiety?', 'Do you have Panic attack?', 'Did you seek any specialist for a treatment?']
    categorical_columns = df.select_dtypes(include=['object']).columns.difference(binary_columns)
    df[binary_columns] = df[binary_columns].apply(lambda x: x.map({'Yes': 1, 'No': 0}))
    df[categorical_columns] = df[categorical_columns].apply(lambda col: LabelEncoder().fit_transform(col))
    return df

# Preprocess the dataset
df = load_and_preprocess_data('/Users/alexisbutterfly/Desktop/WGU/Portfolio/C964/Task 2/Student Mental health.csv')

# Feature Selection and Dataset Splitting
X = df.drop('Did you seek any specialist for a treatment?', axis=1)
y = df['Did you seek any specialist for a treatment?']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Data Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model Training
logging.info("Starting model training with XGBClassifier...")
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train_scaled, y_train)

# Model Evaluation
predictions = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, predictions)
report = classification_report(y_test, predictions, zero_division=0)

# Dimensionality Reduction with PCA for Visualization
pca = PCA(n_components=2)
components = pca.fit_transform(X)

# Initialize Dash Application
app = Dash(__name__)
app.layout = html.Div([
    html.H1("Student Mental Health Analysis"),
    html.Div(f"Model Accuracy: {accuracy:.2%}"),
    dcc.Dropdown(
        id='chart-dropdown',
        options=[
            {'label': 'PCA Component Scatter Plot', 'value': 'PCA'},
            {'label': 'Feature Importance Bar Chart', 'value': 'FI'},
            {'label': 'Sunburst Chart for Conditions and Treatment', 'value': 'SB'}
        ],
        value='PCA'
    ),
    dcc.Graph(id='dynamic-graph'),
    html.Div(id='model-report', children=[html.Pre(report)]),
    html.H3("Decision Support"),
    html.Div(id='decision-support', children=["Based on the predictive model, review course demands if more than 50% of students are likely to seek treatment. This suggests considering more supportive measures or adjusting course demands."])
])

# Callback for Interactive Dashboard Components
@app.callback(
    [Output('dynamic-graph', 'figure'),
     Output('decision-support', 'children')],
    [Input('chart-dropdown', 'value')]
)
def update_content(selected_chart):
    """Updates dashboard content based on dropdown selection."""
    if selected_chart == 'PCA':
        fig = px.scatter(components, x=0, y=1, color=y.astype(str))
    elif selected_chart == 'FI':
        fig = px.bar(x=X.columns, y=model.feature_importances_, title="Feature Importances")
    elif selected_chart == 'SB':
        fig = px.sunburst(df, path=['Do you have Depression?', 'Do you have Anxiety?', 'Do you have Panic attack?', 'Did you seek any specialist for a treatment?'], color='Did you seek any specialist for a treatment?', title="Conditions and Treatment Seeking")
    else:
        logging.warning(f"Unexpected chart selection: {selected_chart}")
        fig = {}
        return fig, "Please select a valid chart type from the dropdown."
    return fig, "Based on the predictive model, review course demands if more than 50% of students are likely to seek treatment."

# Run the Dash app
if __name__ == '__main__':
    app.run_server(debug=True)


2024-02-08 14:20:43,840 - INFO - Loading and preprocessing data from: /Users/alexisbutterfly/Desktop/WGU/Portfolio/C964/Task 2/Student Mental health.csv
2024-02-08 14:20:43,856 - INFO - Starting model training with XGBClassifier...
