In [1]:
#Data preprocessing
import numpy as np
import pandas as pd
import re
from imblearn.over_sampling import SMOTE 

#Data Transformation
from sklearn.neighbors import KNeighborsClassifier,KNeighborsRegressor
from sklearn.svm import SVC,SVR
from sklearn.linear_model import LogisticRegression,LinearRegression,Lasso,Ridge
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor
from sklearn.ensemble import AdaBoostClassifier,GradientBoostingClassifier,RandomForestClassifier,VotingClassifier
from sklearn.ensemble import AdaBoostRegressor,GradientBoostingRegressor,RandomForestRegressor
from sklearn.model_selection import train_test_split,StratifiedKFold,KFold
from sklearn.preprocessing import LabelEncoder,StandardScaler,RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score,mean_squared_error

#Data visualisation
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

#Handling warnings
import warnings
warnings.filterwarnings("ignore")

import os
import pickle

#flask
from flask import Flask, request, jsonify, render_template
import pandas as pd



In [2]:
def BMC(data):
    le = LabelEncoder()
    pipeline = Pipeline([("robust scaler",RobustScaler()),("std_scaler",StandardScaler())])
    X,y = data.iloc[:,:-1],data.iloc[:,-1]
    for col in X.select_dtypes("O").columns:
        X[col] = le.fit_transform(X[col])
    for col in X.columns:
        X[col]=pipeline.fit_transform(np.array(X[col]).reshape(-1,1))
    model_train,model_test = [],[]
    if y.dtype in [object]:
        estimators = {"K-Nearest Neighbor":KNeighborsClassifier(),
                     "Support Vector Classifier":SVC(),"Decision Tree Classifier":DecisionTreeClassifier(),
                     "AdaBoostClassifier":AdaBoostClassifier(),
                     "Gradient Boosting Classifier":GradientBoostingClassifier(),
                     "Logistic Regression":LogisticRegression()}
        str_kfold = StratifiedKFold(n_splits=10)
        for estimator in estimators.values():
            train_scores,test_scores = [],[]
            for train_index,test_index in str_kfold.split(X,y):
                X_train,X_test = X.iloc[train_index,:],X.iloc[test_index,:]
                y_train,y_test = y.iloc[train_index],y.iloc[test_index]
                estimator.fit(X_train,y_train)
                train_scores.append(estimator.score(X_train,y_train))
                test_scores.append(estimator.score(X_test,y_test))
            model_train.append(np.round(np.array(train_scores).mean(),3))
            model_test.append(np.round(np.array(test_scores).mean(),3))
        df = pd.DataFrame({"Model":estimators.keys(),
                 "Train Score":model_train,
                 "Test Score":model_test})
        df = df.nlargest(8,columns=["Train Score","Test Score"])
    elif y.dtype in [float,int]:
        estimators = {"K-Nearest Neighbor":KNeighborsRegressor(),
                     "Support Vector Regressor":SVR(),"DecisionTreeRegressor":DecisionTreeRegressor(),
                     "AdaBoostRegressor":AdaBoostRegressor(),
                     "GradientBoostingRegressor":GradientBoostingRegressor(),
                     "Linear Regression":LinearRegression(),
                     "Lasso Regression":Lasso(),"Ridge Regression":Ridge()}
        kfold = KFold(n_splits=10)
        for estimator in estimators.values():
            train_errors,test_errors = [],[]
            for train_index,test_index in kfold.split(X,y):
                X_train,X_test = X.iloc[train_index,:],X.iloc[test_index,:]
                y_train,y_test = y.iloc[train_index],y.iloc[test_index]
                estimator.fit(X_train,y_train)
                train_pred = estimator.predict(X_train)
                test_pred = estimator.predict(X_test)
                train_errors.append(mean_squared_error(y_train,train_pred))
                test_errors.append(mean_squared_error(y_test,test_pred))
            model_train.append(np.array(train_errors).mean())
            model_test.append(np.array(test_errors).mean())
        df = pd.DataFrame({"Model":estimators.keys(),
                     "Train Errors":np.round(np.sqrt(np.array(model_train)),3),
                     "Test Errors":np.round(np.sqrt(np.array(model_test)),3)})
        df = df.nsmallest(8,columns=["Train Errors","Test Errors"])
    return df

In [3]:
app = Flask(__name__)
@app.route("/")
def DefaultFunction():
    return render_template("index.html")
@app.route('/file',methods=["POST","GET"])
def a():
    file = request.files["file"]
    df = pd.read_csv(file)
    df = BMC(df)
    data,best_model = df,df.iloc[0]["Model"]
    return render_template("index.html",data=data.values,bmod=best_model)
if __name__=="__main__":
    app.run(debug=False)

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [23/Mar/2023 17:28:43] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [23/Mar/2023 17:28:43] "GET /static/4.jpg HTTP/1.1" 304 -
127.0.0.1 - - [23/Mar/2023 17:28:43] "GET /favicon.ico HTTP/1.1" 404 -
127.0.0.1 - - [23/Mar/2023 17:28:57] "POST /file HTTP/1.1" 200 -
127.0.0.1 - - [23/Mar/2023 17:28:57] "GET /static/4.jpg HTTP/1.1" 304 -
