In [1]:
import os
%pwd

'c:\\Users\\Dhruv\\OneDrive\\Desktop\\AI-text-detection-web-app\\research'

In [2]:
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\Dhruv\\OneDrive\\Desktop\\AI-text-detection-web-app'

In [14]:
import pandas as pd

In [15]:
X_train=pd.read_csv("artifacts/data_transformation/X_train.csv")
X_test=pd.read_csv("artifacts/data_transformation/X_test.csv")
y_train=pd.read_csv("artifacts/data_transformation/y_train.csv")
y_test=pd.read_csv("artifacts/data_transformation/y_test.csv")

In [20]:
y_train = y_train["generated"]
y_test = y_test["generated"]
X_test = X_test["text"]
X_train = X_train["text"]

In [21]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir: Path
    trained_model_path: Path

In [23]:
import importlib
import AI_Text_Detection.utils.common as function

# Reload function module
importlib.reload(function)

<module 'AI_Text_Detection.utils.common' from 'c:\\users\\dhruv\\onedrive\\desktop\\ai-text-detection-web-app\\src\\AI_Text_Detection\\utils\\common.py'>

In [24]:
from AI_Text_Detection.constants import *
from AI_Text_Detection.utils.common import read_yaml, create_directories, save_object

In [25]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_training_config(self) -> ModelTrainingConfig:
        config = self.config.model_training

        create_directories([config.root_dir])

        model_training_config = ModelTrainingConfig(
            root_dir=config.root_dir,
            trained_model_path=config.trained_model_path
        )

        return model_training_config

In [26]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB, ComplementNB
from sklearn.svm import SVC, LinearSVC
from AI_Text_Detection import logger
from AI_Text_Detection.exception import CustomException
import sys
from sklearn.metrics import accuracy_score

In [38]:
class ModelTraining:
    def __init__(self, config: ModelTrainingConfig):
        self.config = config

    def create_model_pipeline(self):
        '''
        creating the model pipeline using Naive Bayes, Complement Naive Bayes
        & Linear SVC algorithm
        '''
        try: 
            logger.info(f"model pipeline building initiated")

            pipelineMNB = Pipeline([
                ('count_vectorizer', CountVectorizer(stop_words='english')),  # Step 1: CountVectorizer
                ('tfidf_transformer', TfidfTransformer()),  # Step 2: TF-IDF Transformer
                ('naive_bayes', MultinomialNB())])

            pipelineCNB = Pipeline([
                ('count_vectorizer', CountVectorizer(stop_words='english')),  # Step 1: CountVectorizer
                ('tfidf_transformer', TfidfTransformer()),  # Step 2: TF-IDF Transformer
                ('Complement_Naive_Bayes', ComplementNB())])

            pipelineSVC = Pipeline([
                ('count_vectorizer', CountVectorizer(stop_words='english')),  # Step 1: CountVectorizer
                ('tfidf_transformer', TfidfTransformer()),  # Step 2: TF-IDF Transformer
                ('Linear SVC', LinearSVC())])
            
            logger.info(f"Model pipeline created")
            
            return {'naive_bayes':pipelineMNB, 'Complement_Naive_Bayes':pipelineCNB, 'Linear SVC':pipelineSVC}


        except Exception as e:
            raise CustomException(e, sys)
        

    def initiate_model_training(self, X_train, y_train, X_test, y_test, model_pipeline):
        '''
        training the model on the transformed data
            
        '''
        try: 
            logger.info(f"model traininng initiated")

            model_accuracy = {}
            for model_name, pipeline in model_pipeline.items():
                pipeline.fit(X_train, y_train)
                y_pred = pipeline.predict(X_test)
                accuracy = accuracy_score(y_test, y_pred)
                model_accuracy[model_name] = accuracy

            
            logger.info(f"model traininng completed")

            ## To get best model score from dict
            best_model_score = max(sorted(model_accuracy.values()))

            ## To get best model name from dict

            best_model_name = list(model_accuracy.keys())[
                list(model_accuracy.values()).index(best_model_score)
            ]

            best_model = model_pipeline[best_model_name]

            print(model_accuracy)
            print(best_model_name)
            save_object(file_path=self.config.trained_model_path, obj=best_model)

            return model_accuracy
        
    
        except Exception as e:
                raise CustomException(e, sys)



In [39]:
STAGE_NAME = "Model Training Stage"

class ModelTrainingPipeline:
    def __init__(self):
        pass

    def main(self):
        try:
            config = ConfigurationManager()
            model_training_config = config.get_model_training_config()
            model_training = ModelTraining(config=model_training_config)
            model_pipeline = model_training.create_model_pipeline()
            model_accuracy = model_training.initiate_model_training(X_train, y_train, X_test, y_test, model_pipeline)
            return model_accuracy
            
        except Exception as e:
            raise CustomException(e, sys)
        

In [40]:
if __name__ == '__main__':
    try:
        logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
        obj = ModelTrainingPipeline()
        model_accuracy = obj.main()
        print(model_accuracy)
        logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
    except Exception as e:
        logger.exception(e)
        raise CustomException(e, sys)

[2024-03-19 16:36:44,373: INFO: 2342994688: >>>>>> stage Model Training Stage started <<<<<<]
[2024-03-19 16:36:44,381: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-03-19 16:36:44,385: INFO: common: yaml file: params.yaml loaded successfully]
[2024-03-19 16:36:44,386: INFO: common: created directory at: artifacts]
[2024-03-19 16:36:44,389: INFO: common: created directory at: artifacts/model_training]
[2024-03-19 16:36:44,392: INFO: 2691265254: model pipeline building initiated]
[2024-03-19 16:36:44,394: INFO: 2691265254: Model pipeline created]
[2024-03-19 16:36:44,395: INFO: 2691265254: model traininng initiated]
[2024-03-19 16:36:44,672: INFO: 2691265254: model traininng completed]
{'naive_bayes': 0.735, 'Complement_Naive_Bayes': 0.82, 'Linear SVC': 0.9}
Linear SVC
{'naive_bayes': 0.735, 'Complement_Naive_Bayes': 0.82, 'Linear SVC': 0.9}
[2024-03-19 16:36:44,677: INFO: 2342994688: >>>>>> stage Model Training Stage completed <<<<<<





In [30]:
from AI_Text_Detection.utils.common import load_object

In [41]:
model = load_object("artifacts/model_training/model.pkl")

In [42]:
model.predict(["My name aarava, and i like black tea"])

array([0.])

In [43]:
print(model_accuracy)

{'naive_bayes': 0.735, 'Complement_Naive_Bayes': 0.82, 'Linear SVC': 0.9}
