In [21]:
import numpy as np
import pandas as pd
import plotly.express as px
import cufflinks as cf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, accuracy_score
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVR, SVC
from sklearn.preprocessing import PolynomialFeatures
from sklearn.naive_bayes import GaussianNB

# Enable Cufflinks offline mode
cf.go_offline()

# User input: Choose between Regression and Classification
task = input("Enter 'R' for Regression or 'C' for Classification: ")

# Ask for dataset location
dataset_location = input("Enter the dataset location (e.g., 'data.csv'): ")

# Load dataset based on user input
dataset = pd.read_csv(dataset_location)
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=101)

# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Apply PCA for dimensionality reduction
pca = PCA(n_components=2)  # You can adjust the number of components as needed
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Initialize empty lists to store results
models = []
results = []

if task == 'R':  # Regression task
    # Linear Regression
    models.append("Linear Regression")
    regressor_lr = LinearRegression()
    regressor_lr.fit(X_train_pca, y_train)
    y_pred_lr = regressor_lr.predict(X_test_pca)
    r2_lr = r2_score(y_test, y_pred_lr)
    results.append(r2_lr)

    # Random Forest Regression
    models.append("Random Forest Regression")
    regressor_rf = RandomForestRegressor(n_estimators=100, random_state=0)
    regressor_rf.fit(X_train_pca, y_train)
    y_pred_rf = regressor_rf.predict(X_test_pca)
    r2_rf = r2_score(y_test, y_pred_rf)
    results.append(r2_rf)

    # Support Vector Regression
    models.append("Support Vector Regression")
    regressor_svr = SVR(kernel='rbf')
    regressor_svr.fit(X_train_pca, y_train)
    y_pred_svr = regressor_svr.predict(X_test_pca)
    r2_svr = r2_score(y_test, y_pred_svr)
    results.append(r2_svr)

    # Gradient Boosting Regression
    models.append("Gradient Boosting Regression")
    regressor_gb = GradientBoostingRegressor(n_estimators=100, random_state=0)
    regressor_gb.fit(X_train_pca, y_train)
    y_pred_gb = regressor_gb.predict(X_test_pca)
    r2_gb = r2_score(y_test, y_pred_gb)
    results.append(r2_gb)

    # Polynomial Regression
    models.append("Polynomial Regression")
    poly_reg = PolynomialFeatures(degree=2)  # You can adjust the degree
    X_poly = poly_reg.fit_transform(X_train_pca)
    regressor_poly = LinearRegression()
    regressor_poly.fit(X_poly, y_train)
    X_test_poly = poly_reg.transform(X_test_pca)
    y_pred_poly = regressor_poly.predict(X_test_poly)
    r2_poly = r2_score(y_test, y_pred_poly)
    results.append(r2_poly)

    # Create a DataFrame to store results
    results_df = pd.DataFrame({'Model': models, 'R-squared (R2)': results})

    # Create a combined bar plot using Cufflinks
    fig = px.bar(results_df, x='Model', y='R-squared (R2)', title='Model Performance Comparison (Regression)')
    fig.update_yaxes(type="log")  # Set a logarithmic y-axis scale
    fig.show()

elif task == 'C':  # Classification task
    # Logistic Regression
    models.append("Logistic Regression")
    classifier_lr = LogisticRegression(random_state=0)
    classifier_lr.fit(X_train_pca, y_train)
    y_pred_lr = classifier_lr.predict(X_test_pca)
    accuracy_lr = accuracy_score(y_test, y_pred_lr)
    results.append(accuracy_lr)

    # Random Forest Classification
    models.append("Random Forest Classification")
    classifier_rf = RandomForestClassifier(n_estimators=100, criterion='entropy', random_state=0)
    classifier_rf.fit(X_train_pca, y_train)
    y_pred_rf = classifier_rf.predict(X_test_pca)
    accuracy_rf = accuracy_score(y_test, y_pred_rf)
    results.append(accuracy_rf)

    # Support Vector Machine (SVM)
    models.append("SVM")
    classifier_svm = SVC(kernel='rbf', random_state=0)
    classifier_svm.fit(X_train_pca, y_train)
    y_pred_svm = classifier_svm.predict(X_test_pca)
    accuracy_svm = accuracy_score(y_test, y_pred_svm)
    results.append(accuracy_svm)

    # Gradient Boosting Classification
    models.append("Gradient Boosting Classification")
    classifier_gb = GradientBoostingClassifier(n_estimators=100, random_state=0)
    classifier_gb.fit(X_train_pca, y_train)
    y_pred_gb = classifier_gb.predict(X_test_pca)
    accuracy_gb = accuracy_score(y_test, y_pred_gb)
    results.append(accuracy_gb)

    # Naive Bayes Classification
    models.append("Naive Bayes")
    classifier_nb = GaussianNB()
    classifier_nb.fit(X_train_pca, y_train)
    y_pred_nb = classifier_nb.predict(X_test_pca)
    accuracy_nb = accuracy_score(y_test, y_pred_nb)
    results.append(accuracy_nb)

    # Create a DataFrame to store results
    results_df = pd.DataFrame({'Model': models, 'Accuracy': results})

    # Set a custom y-axis scale (e.g., logarithmic scale)
    fig = px.bar(results_df, x='Model', y='Accuracy', title='Model Performance Comparison (Classification)')
    fig.update_yaxes(type="log")
    fig.show()

Enter 'R' for Regression or 'C' for Classification: R
Enter the dataset location (e.g., 'data.csv'): Datac.csv
