In [3]:
import io
import base64

import pandas as pd
import plotly.express as px
import cufflinks as cf
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, accuracy_score
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVR, SVC
from sklearn.preprocessing import PolynomialFeatures
from sklearn.naive_bayes import GaussianNB

# Enable Cufflinks offline mode
cf.go_offline()

# Create a Dash web application
app = dash.Dash(__name__)

# Define the app layout
app.layout = html.Div([
    html.H1("Model Performance Comparison"),
    html.Label("Choose a task:"),
    dcc.RadioItems(
        id='task-selector',
        options=[
            {'label': 'Regression', 'value': 'R'},
            {'label': 'Classification', 'value': 'C'}
        ],
        value='R'
    ),
    dcc.Upload(
        id='upload-data',
        children=html.Button('Upload Dataset'),
        style={
            'width': '100%',
            'height': '60px',
            'lineHeight': '60px',
            'borderWidth': '1px',
            'borderStyle': 'dashed',
            'borderRadius': '5px',
            'textAlign': 'center'
        },
        multiple=False
    ),
    dcc.Graph(id='result-plot')
])

# Define callback to update the plot based on user input
@app.callback(
    Output('result-plot', 'figure'),
    Input('task-selector', 'value'),
    Input('upload-data', 'contents')
)
def update_plot(selected_task, file_contents):
    if file_contents is None:
        return {}

    # Read the uploaded dataset
    content_type, content_string = file_contents.split(',')
    decoded = pd.read_csv(io.StringIO(base64.b64decode(content_string).decode('utf-8')))

    X = decoded.iloc[:, :-1].values
    y = decoded.iloc[:, -1].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=101)

    sc = StandardScaler()
    X_train_pca = sc.fit_transform(X_train)
    X_test_pca = sc.transform(X_test)

    """"pca = PCA(n_components=2)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)"""

    models = []
    results = []

    if selected_task == 'R':  # Regression task
        # Linear Regression
        models.append("Linear Regression")
        regressor_lr = LinearRegression()
        regressor_lr.fit(X_train_pca, y_train)
        y_pred_lr = regressor_lr.predict(X_test_pca)
        r2_lr = r2_score(y_test, y_pred_lr)
        results.append(r2_lr)

        # Random Forest Regression
        models.append("Random Forest Regression")
        regressor_rf = RandomForestRegressor(n_estimators=100, random_state=0)
        regressor_rf.fit(X_train_pca, y_train)
        y_pred_rf = regressor_rf.predict(X_test_pca)
        r2_rf = r2_score(y_test, y_pred_rf)
        results.append(r2_rf)

        # Support Vector Regression
        models.append("Support Vector Regression")
        regressor_svr = SVR(kernel='rbf')
        regressor_svr.fit(X_train_pca, y_train)
        y_pred_svr = regressor_svr.predict(X_test_pca)
        r2_svr = r2_score(y_test, y_pred_svr)
        results.append(r2_svr)

        # Gradient Boosting Regression
        models.append("Gradient Boosting Regression")
        regressor_gb = GradientBoostingRegressor(n_estimators=100, random_state=0)
        regressor_gb.fit(X_train_pca, y_train)
        y_pred_gb = regressor_gb.predict(X_test_pca)
        r2_gb = r2_score(y_test, y_pred_gb)
        results.append(r2_gb)

        # Polynomial Regression
        models.append("Polynomial Regression")
        poly_reg = PolynomialFeatures(degree=2)  # You can adjust the degree
        X_poly = poly_reg.fit_transform(X_train_pca)
        regressor_poly = LinearRegression()
        regressor_poly.fit(X_poly, y_train)
        X_test_poly = poly_reg.transform(X_test_pca)
        y_pred_poly = regressor_poly.predict(X_test_poly)
        r2_poly = r2_score(y_test, y_pred_poly)
        results.append(r2_poly)

    elif selected_task == 'C':  # Classification task
        # Logistic Regression
        models.append("Logistic Regression")
        classifier_lr = LogisticRegression(random_state=0)
        classifier_lr.fit(X_train_pca, y_train)
        y_pred_lr = classifier_lr.predict(X_test_pca)
        accuracy_lr = accuracy_score(y_test, y_pred_lr)
        results.append(accuracy_lr)

        # Random Forest Classification
        models.append("Random Forest Classification")
        classifier_rf = RandomForestClassifier(n_estimators=100, criterion='entropy', random_state=0)
        classifier_rf.fit(X_train_pca, y_train)
        y_pred_rf = classifier_rf.predict(X_test_pca)
        accuracy_rf = accuracy_score(y_test, y_pred_rf)
        results.append(accuracy_rf)

        # Support Vector Machine (SVM)
        models.append("SVM")
        classifier_svm = SVC(kernel='rbf', random_state=0)
        classifier_svm.fit(X_train_pca, y_train)
        y_pred_svm = classifier_svm.predict(X_test_pca)
        accuracy_svm = accuracy_score(y_test, y_pred_svm)
        results.append(accuracy_svm)

        # Gradient Boosting Classification
        models.append("Gradient Boosting Classification")
        classifier_gb = GradientBoostingClassifier(n_estimators=100, random_state=0)
        classifier_gb.fit(X_train_pca, y_train)
        y_pred_gb = classifier_gb.predict(X_test_pca)
        accuracy_gb = accuracy_score(y_test, y_pred_gb)
        results.append(accuracy_gb)

        # Naive Bayes Classification
        models.append("Naive Bayes")
        classifier_nb = GaussianNB()
        classifier_nb.fit(X_train_pca, y_train)
        y_pred_nb = classifier_nb.predict(X_test_pca)
        accuracy_nb = accuracy_score(y_test, y_pred_nb)
        results.append(accuracy_nb)

    # Create a DataFrame to store results
    if selected_task == 'R':
        results_df = pd.DataFrame({'Model': models, 'R-squared (R2)': results})
        y_label = 'R-squared (R2)'
        title = 'Model Performance Comparison (Regression)'
    else:
        results_df = pd.DataFrame({'Model': models, 'Accuracy': results})
        y_label = 'Accuracy'
        title = 'Model Performance Comparison (Classification)'

    # Create a combined bar plot using Cufflinks
    fig = px.bar(results_df, x='Model', y=y_label, title=title)
    fig.update_yaxes(type="log")  # Set a logarithmic y-axis scale

    return fig

if __name__ == '__main__':
    app.run_server(debug=True)
