# Catboost

## Setup
### Correct the path

In [None]:
import sys
import os

# Get the current working directory
current_working_directory = os.getcwd()

# Go up one level from the current working directory
parent_directory = os.path.join(current_working_directory, '..')

# Add the parent directory to sys.path
sys.path.append(parent_directory)

os.getcwd()

In [None]:
%pip install catboost
%pip install scikit-learn

In [None]:
%load_ext autoreload

In [None]:
%autoreload 

from sklearn.metrics import accuracy_score, classification_report
from catboost import CatBoostClassifier
from datetime import datetime

from src.features.postprocess import save_predictions
from src.features.ml_service import  prepare_data, prepare_test_data

## Load data

In [None]:
x_train, x_val, x_test, y_train, y_val, y_test = prepare_data()

## Train model

In [None]:
# Initialize CatBoostClassifier
model = CatBoostClassifier(
    iterations=1000,  # Number of trees
    learning_rate=0.1,
    depth=6,  # Depth of each tree
    loss_function='Logloss',  # Objective function
    verbose=100  # Logging frequency
)

# Fit the CatBoost model
model.fit(x_train, y_train, eval_set=(x_val, y_val), use_best_model=True, plot=True)

## Make predictions

In [None]:
# Make predictions
predictions = model.predict(x_test)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, predictions))

## Save model

In [None]:

# Save the model as a pickle file
model.save_model('model.pkl', format="pkl")

# Load the model
model = CatBoostClassifier()
model.load_model('model.pkl', format="pkl")

# Make final predictions
x_test = prepare_test_data()
final_predictions = model.predict(x_test)
final_predictions

In [None]:
# Save the final predictions as a CSV file
current_time = datetime.now().strftime("%Y%m%d%H%M%S")
save_predictions(final_predictions, f'predictions_{current_time}.csv')