In [24]:
# set up working catalog
import sys
from pathlib import Path
sys.path.append(str(Path("..")))

# imports
from common.read_data import read_data

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [25]:
data = read_data()

X = data.drop('Target', axis=1)
y = data["Target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=30)

In [26]:
numeric_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),          # strategy="mean"
    ("scaler", StandardScaler()), 
])

categoric_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),         # strategy="constant"
    ("encoder", OneHotEncoder(handle_unknown="ignore"))
])

numeric_features = X.select_dtypes(include=["int64", "float64"]).columns.tolist()
categorical_features = X.select_dtypes(include=["object", "category"]).columns.tolist()

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_pipeline, numeric_features),
        ("cat", categoric_pipeline, categorical_features)
    ]
)

In [27]:
def train(model):
    pipeline = Pipeline([
        ("preprocessing", preprocessor),
        ("classifier", model)
    ])
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    print(f"===== {model} =====")
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

In [28]:
train(LogisticRegression(max_iter=200))

===== LogisticRegression(max_iter=200) =====
Accuracy: 0.752542372881356


In [29]:
train(DecisionTreeClassifier())

===== DecisionTreeClassifier() =====
Accuracy: 0.6621468926553672


In [30]:
train(SVC(kernel='rbf'))

===== SVC() =====
Accuracy: 0.751412429378531
