In [None]:
import json
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC


# Step 1: Parse the JSON file
with open('algoparams_from_ui.json', 'r') as f:
    params = json.load(f)


# Step 2: Define the machine learning pipeline
pipeline_steps = []

# Feature handling
if params['feature_handling']['feature_type'] == 'text':
    if params['feature_handling']['vectorizer'] == 'count':
        vectorizer = CountVectorizer()
    else:
        vectorizer = TfidfVectorizer()
    pipeline_steps.append(('vectorizer', vectorizer))

# Feature generation
if params['feature_generation']['feature_type'] == 'text':
    if params['feature_generation']['ngram_range']:
        ngram_range = tuple(params['feature_generation']['ngram_range'])
        pipeline_steps.append(('ngram', CountVectorizer(ngram_range=ngram_range)))
    if params['feature_generation']['stop_words']:
        stop_words = params['feature_generation']['stop_words']
        pipeline_steps.append(('stop_words', CountVectorizer(stop_words=stop_words)))
    if params['feature_generation']['max_features']:
        max_features = params['feature_generation']['max_features']
        pipeline_steps.append(('max_features', CountVectorizer(max_features=max_features)))

# Model building
if params['model_building']['model_type'] == 'naive_bayes':
    clf = MultinomialNB()
elif params['model_building']['model_type'] == 'logistic_regression':
    clf = LogisticRegression()
else:
    clf = SVC()

pipeline_steps.append(('clf', clf))

pipeline = Pipeline(pipeline_steps)

# Step 3: Train the model with Grid search
if params['hyper_params']:
    param_grid = params['hyper_params']
    grid_search = GridSearchCV(pipeline, param_grid, cv=5)
    grid_search.fit(X_train, y_train)
    best_params = grid_search.best_params_
    pipeline.set_params(**best_params)

# Step 4: Fit the model and make predictions
X_train, y_train = load_data() # Define this function to load the training data
X_test, y_test = load_data() # Define this function to load the testing data

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
