# AutoMlJar

## Setup
### Correct the path

In [None]:
import sys
import os

# Get the current working directory
current_working_directory = os.getcwd()

# Go up one level from the current working directory
parent_directory = os.path.join(current_working_directory, '..')

# Add the parent directory to sys.path
sys.path.append(parent_directory)

os.getcwd()

In [None]:
%pip install -q -U git+https://github.com/mljar/mljar-supervised.git@master
%pip install -q -U matplotlib==3.1.3 
%pip install scikit-learn

In [None]:
%load_ext autoreload

In [None]:
%autoreload 

# Import the necessary libraries
%matplotlib inline
import warnings
from datetime import datetime
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score 
from supervised.automl import AutoML

pd.set_option('display.max_columns', 200)
warnings.filterwarnings('ignore')

from src.features.post_processor import save_predictions
from src.features.ml_service import  prepare_data, prepare_test_data

## Load data

In [None]:
x_train, x_val, x_test, y_train, y_val, y_test = prepare_data()

## Train model

In [None]:

# Initialize MLJAR AutoML
automl = AutoML(mode="Explain")  # You can also use mode like "Perform", "Compete" etc. based on your need

# Train the model
# MLJAR can handle DataFrame with target column, so you can concatenate your X and y
train_data = pd.concat([x_train, y_train], axis=1)

# Fit the model
automl.fit(train_data, y_train.name)  # y_train.name assumes y_train is a pandas Series with the name attribute set to the target column name


## Make predictions

In [None]:

# To evaluate the model, you can use the AutoML `predict` method
predictions = automl.predict(x_val)

# Calculate accuracy or any other performance metric that you wish to use

accuracy = accuracy_score(y_val, predictions)
print(f"Validation Accuracy: {accuracy}")

# MLJAR also provides a leaderboard with model performance
automl.report()


## Save model