In [52]:
import os

os.environ["MLFLOW_TRACKING_URI"]=<url>
os.environ["MLFLOW_TRACKING_USERNAME"]=<username>
os.environ["MLFLOW_TRACKING_PASSWORD"]=<password/token>

In [53]:
import tensorflow as tf
import sklearn 
import pickle as pk

In [54]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class EvaluationConfig:
    path_of_model: Path
    path_of_stemmer: Path
    path_of_stopwords: Path
    path_of_vectorizer: Path
    training_data: Path
    all_params: dict
    mlflow_uri: str
    params_image_size: list
    params_batch_size: int
    params_data_size: int

In [55]:
from sentiment_analysis.constants import *
from sentiment_analysis.utils.common import read_yaml, create_directories, save_json

In [56]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_evaluation_config(self) -> EvaluationConfig:
        eval_config = EvaluationConfig(
            path_of_model=Path(self.config.training.trained_model_path),
            path_of_stemmer=Path(self.config.training.trained_stemmer_path),
            path_of_stopwords=Path(self.config.training.trained_stop_words_path),
            path_of_vectorizer=Path(self.config.training.trained_vectorizer_path),
            training_data=Path(self.config.evaluation.dataset_path),
            mlflow_uri=self.config.evaluation.ml_flow_tracking_url,
            all_params=self.params,
            params_image_size=self.params.IMAGE_SIZE,
            params_batch_size=self.params.BATCH_SIZE,
            params_data_size=self.params.DATA_SIZE
        )
        return eval_config


In [57]:
import tensorflow as tf
from pathlib import Path
import mlflow
import mlflow.keras
import mlflow.sklearn
from urllib.parse import urlparse
import nltk
from nltk import word_tokenize
import pandas as pd
import string
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

In [58]:
class Evaluation:
    def __init__(self, config: EvaluationConfig):
        self.config = config

    
    # def _valid_generator(self):

    #     datagenerator_kwargs = dict(
    #         rescale = 1./255,
    #         validation_split=0.30
    #     )

    #     dataflow_kwargs = dict(
    #         target_size=self.config.params_image_size[:-1],
    #         batch_size=self.config.params_batch_size,
    #         interpolation="bilinear"
    #     )

    #     valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
    #         **datagenerator_kwargs
    #     )

    #     self.valid_generator = valid_datagenerator.flow_from_directory(
    #         directory=self.config.training_data,
    #         subset="validation",
    #         shuffle=False,
    #         **dataflow_kwargs
    #     )


    # @staticmethod
    # def load_model(path: Path) -> tf.keras.Model:
    #     return tf.keras.models.load_model(path)
    def load_model(self):
        self.model = pk.load(open(self.config.path_of_model, 'rb'))
        self.stopwords = pk.load(open(self.config.path_of_stopwords, 'rb'))
        self.stemmer = pk.load(open(self.config.path_of_stemmer, 'rb'))
        self.vectorizer = pk.load(open(self.config.path_of_vectorizer, 'rb'))

    def data_preprocess(self):
        df = pd.read_csv(self.config.training_data)
        df = df[['Text', 'Score']]
        df = df.sample(n=self.config.params_data_size)
        df = df.loc[df['Score']!=3]
        df = df.loc[df['Score']!=4]
        def category(score):
            return 0 if score==1 or score==2 else 1
        df['Sentiment']= df['Score'].apply(category)
        def text_preprocessing(text):
            lower_casing = text.lower()
            tokens = word_tokenize(lower_casing)
            tokens = [self.stemmer.stem(token) for token in tokens if token not in self.stopwords and token not in string.punctuation]
            return " ".join(tokens)
        
        df['Text'] = df['Text'].apply(text_preprocessing)
        self.y_test = np.array(df['Sentiment'])
        self.df = self.vectorizer.transform(df['Text'])
    
    def evaluation(self):
        # self.model = self.load_model(self.config.path_of_model)
        self.load_model()
        # self._valid_generator()
        self.data_preprocess()
        self.y_pred = self.model.predict(self.df)
        self.score = accuracy_score(self.y_test, self.y_pred)
        self.save_score()

    def save_score(self):
        scores = { "accuracy": self.score}
        save_json(path=Path("scores.json"), data=scores)

    
    def log_into_mlflow(self):
        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
        print(tracking_url_type_store)
        
        with mlflow.start_run():
            mlflow.log_params(self.config.all_params)
            mlflow.log_metrics(
                { "accuracy": self.score}
            )
            # Model registry does not work with file store
            if tracking_url_type_store != "file":

                # Register the model
                # There are other ways to use the Model Registry, which depends on the use case,
                # please refer to the doc for more information:
                # https://mlflow.org/docs/latest/model-registry.html#api-workflow
                mlflow.sklearn.log_model(self.model, "model", registered_model_name="logisticRegression")
            else:
                mlflow.sklearn.log_model(self.model, "model")


In [59]:
try:
    config = ConfigurationManager()
    eval_config = config.get_evaluation_config()
    evaluation = Evaluation(eval_config)
    evaluation.evaluation()
    evaluation.log_into_mlflow()

except Exception as e:
   raise e

2024-08-07 12:20:23,389: Sentiment-Analysis: INFO: common.py: read_yaml:- yaml file: config/config.yaml loaded successfully
2024-08-07 12:20:23,391: Sentiment-Analysis: INFO: common.py: read_yaml:- yaml file: params.yaml loaded successfully
2024-08-07 12:20:23,391: Sentiment-Analysis: INFO: common.py: create_directories:- created directory at: artifacts
2024-08-07 12:20:51,025: Sentiment-Analysis: INFO: common.py: save_json:- json file saved at: scores.json


https


Successfully registered model 'logisticRegression'.
2024/08/07 12:21:00 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: logisticRegression, version 1
Created version '1' of model 'logisticRegression'.
