In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from jcopml.pipeline import num_pipe, cat_pipe
from jcopml.utils import save_model, load_model
from jcopml.plot import plot_missing_value
from jcopml.feature_importance import mean_score_decrease

# Import Data

In [None]:
df = pd.read_csv("____________", index_col="___________", parse_dates=["____________"])
df.head()

# Data Splitting

In [None]:
# Shuffle Split
X = df.drop(columns="___________")
y = "_____________"

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape




# Stratified Shuffle Split
X = df.drop(columns="___________")
y = "_____________"

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

# Training

### Preprocessor

In [None]:
# Preprocessor
preprocessor = ColumnTransformer([
    ('numeric', num_pipe(), ["______________"]),
    ('categoric', cat_pipe(encoder='onehot'), ["_____________"]),
])

### Supervised Learning Pipeline

#### Regression

In [2]:
# K-Nearest Neigbor(s)
from sklearn.neighbors import KNeighborsRegressor
pipeline = Pipeline([
    ('prep', preprocessor),
    ('algo', KNeighborsRegressor())
])

In [3]:
# Support Vector Machine
from sklearn.svm import SVR
pipeline = Pipeline([
    ('prep', preprocessor),
    ('algo', SVR(max_iter=500))
])

In [None]:
# Random Forest
from sklearn.ensemble import RandomForestRegressor
pipeline = Pipeline([
    ('prep', preprocessor),
    ('algo', RandomForestRegressor(n_jobs=-1, random_state=42))
])

In [None]:
# Extreme Gradient Boosting (XGBoost)
from xgboost import XGBRegressor
pipeline = Pipeline([
    ('prep', preprocessor),
    ('algo', XGBRegressor(n_jobs=-1, random_state=42))
])

In [4]:
# Linear Regression
from sklearn.linear_model import LinearRegression
pipeline = Pipeline([
    ('prep', preprocessor),
    ('algo', LinearRegression())
])

In [None]:
# ElasticNet Regression
from sklearn.linear_model import ElasticNet
pipeline = Pipeline([
    ('prep', preprocessor),
    ('algo', ElasticNet())
])

#### Classification

In [2]:
# K-Nearest Neigbor(s)
from sklearn.neighbors import KNeighborsClassifier
pipeline = Pipeline([
    ('prep', preprocessor),
    ('algo', KNeighborsClassifier())
])

In [3]:
# Support Vector Machine
from sklearn.svm import SVC
pipeline = Pipeline([
    ('prep', preprocessor),
    ('algo', SVC(max_iter=500))
])

In [None]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier
pipeline = Pipeline([
    ('prep', preprocessor),
    ('algo', RandomForestClassifier(n_jobs=-1, random_state=42))
])

In [None]:
# Extreme Gradient Boosting (XGBoost)
from xgboost import XGBClassifier
pipeline = Pipeline([
    ('prep', preprocessor),
    ('algo', XGBClassifier(n_jobs=-1, random_state=42))
])

In [4]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression
pipeline = Pipeline([
    ('prep', preprocessor),
    ('algo', LogisticRegression(solver='lbfgs', n_jobs=-1, random_state=42))
])

### Hyperparameter Tuning

#### Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV
from jcopml.tuning import grid_search_params as gsp

model = GridSearchCV(pipeline, gsp."_______________", cv="___", scoring='___', n_jobs=-1, verbose=1)
model.fit(X_train, y_train)

print(model.best_params_)
print(model.score(X_train, y_train), model.best_score_, model.score(X_test, y_test))

#### Randomized Search

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from jcopml.tuning import random_search_params as rsp

model = RandomizedSearchCV(pipeline, rsp."_______________", cv="___", scoring='___', n_iter="___", n_jobs=-1, verbose=1, random_state=42)
model.fit(X_train, y_train)

print(model.best_params_)
print(model.score(X_train, y_train), model.best_score_, model.score(X_test, y_test))

#### Bayesian Search

In [None]:
from skopt import BayesSearchCV
from jcopml.tuning import bayes_search_params as bsp

model = BayesSearchCV(pipeline, bsp."_______________", cv="___", scoring="__", n_iter="___", n_jobs=-1, verbose=1, random_state=42)
model.fit(X_train, y_train)

print(model.best_params_)
print(model.score(X_train, y_train), model.best_score_, model.score(X_test, y_test))

# Save Model

In [None]:
# Save whole
save_model(model, "__________.pkl")

In [None]:
# Save best estimator
save_model(model.best_estimator_, "__________.pkl")