# Model Training

In [1]:
import numpy as np

X = np.load('data/features.npy')
y = np.load('data/labels.npy')

In [2]:
X.shape

(71537, 100)

In [3]:
y.shape

(71537,)

In [4]:
y

array([1., 1., 0., ..., 0., 0., 1.], dtype=float32)

In [None]:
from sklearn.model_selection import train_test_split

RANDOM_SEED = 42

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = RANDOM_SEED)

In [13]:
from sklearn.model_selection import train_test_split

RANDOM_SEED = 42
SUBSET_SIZE = 1000

X_subset = X[:SUBSET_SIZE]
y_subset = y[:SUBSET_SIZE]

X_train, X_test, y_train, y_test = train_test_split(X_subset, y_subset, test_size = 0.2, random_state = RANDOM_SEED)

## Connect to MLFlow

In [6]:
import mlflow

EXPERIMENT_NAME = 'fake_news_detector'

experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)

print(experiment)

<Experiment: artifact_location='mlflow-artifacts:/381440502596330154', creation_time=1718830391813, experiment_id='381440502596330154', last_update_time=1718830391813, lifecycle_stage='active', name='fake_news_detector', tags={}>


In [8]:
run_name = 'prueba'

with mlflow.start_run(
     experiment_id = experiment.experiment_id,
     run_name = run_name,
) as run:

    print('Finish')

Finish


In [12]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

run_name = 'prueba'

model = SVC(random_state = RANDOM_SEED)

with mlflow.start_run(
     experiment_id = experiment.experiment_id,
     run_name = run_name,
) as run:

     
     model.fit(X_train, y_train)

     y_pred = model.predict(X_test)
     accuracy = accuracy_score(y_test, y_pred)

     print(f"Accuracy: {accuracy}")

accuracy: 0.9280123008107353
Finish


In [14]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier

run_name = 'prueba_rn'

model = MLPClassifier(random_state = RANDOM_SEED)

with mlflow.start_run(
     experiment_id = experiment.experiment_id,
     run_name = run_name,
) as run:

     model.fit(X_train, y_train)

     y_pred = model.predict(X_test)
     accuracy = accuracy_score(y_test, y_pred)

     print(f"Accuracy: {accuracy}")

Accuracy: 0.875


In [18]:
experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id

with mlflow.start_run(
     experiment_id = experiment_id,
     run_name = run_name,
) as run:


     # set the tags
     mlflow.set_tags({
         "model": "neural-networks",
         "author": "MIS",
     })


     # Log a parameter(key = value pair)
     # Log the mode parameters
     mlflow.log_param("random_seed", 42)
     mlflow.log_param("train_size", 1000) 

     # Model training code here ...

     # Log a metric; metrics can be updated through the run
     mlflow.log_metric("accuracy", 0.875)   

     # Print the run ID
     print(f"Run ID: {run.info.run_id}")


Run ID: 1fe3467a6e474aa2b3607e1c1c32abe1


In [None]:
experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id

with mlflow.start_run(
     experiment_id = experiment_id,
     run_name = run_name,
) as run:

     mlflow.sklearn.log_model(model, "neural_networks")
     mlflow.sklearn.set_tags({"model": "neural-networks"})     


     # Print the run ID
     print(f"Run ID: {run.info.run_id}")