In [1]:
import os
%pwd

'c:\\Users\\Dhruv\\OneDrive\\Desktop\\AI-text-detection-web-app\\research'

In [2]:
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\Dhruv\\OneDrive\\Desktop\\AI-text-detection-web-app'

In [4]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    train_data_file: Path
    test_data_file: Path

In [5]:
from AI_Text_Detection.constants import *
from AI_Text_Detection.utils.common import read_yaml, create_directories

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            train_data_file=config.train_data_file,
            test_data_file=config.test_data_file
        )

        return data_transformation_config

In [12]:
import importlib
import AI_Text_Detection.entity.config_entity as function

# Reload function module
importlib.reload(function)


<module 'AI_Text_Detection.entity.config_entity' from 'c:\\users\\dhruv\\onedrive\\desktop\\ai-text-detection-web-app\\src\\AI_Text_Detection\\entity\\config_entity.py'>

In [7]:
import os
import sys
from AI_Text_Detection import logger
from AI_Text_Detection.utils.common import lower_case, remove_punctuation, remove_stopwords, remove_tags
from AI_Text_Detection.exception import CustomException
import pandas as pd
import nltk
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Dhruv\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Dhruv\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [8]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config

    def initiate_data_transformation(self):
        '''
        Transform the data and train the model
        '''
        try: 
            train_df = pd.read_csv(self.config.train_data_file)
            test_df = pd.read_csv(self.config.test_data_file) 
            logger.info(f"train and test dataframe loaded")
            logger.info(f"Data transformation initiated")
            train_df["text"]=train_df["text"].apply(remove_tags)
            train_df["text"]=train_df["text"].apply(remove_punctuation)
            train_df["text"]=train_df["text"].apply(lower_case)
            train_df["text"]=train_df["text"].apply(remove_stopwords)
            test_df["text"]=test_df["text"].apply(remove_tags)
            test_df["text"]=test_df["text"].apply(remove_punctuation)
            test_df["text"]=test_df["text"].apply(lower_case)
            test_df["text"]=test_df["text"].apply(remove_stopwords)
            X_train = train_df["text"]
            y_train = train_df["generated"]
            X_test = test_df["text"]
            y_test = test_df["generated"]
            logger.info(f"Data transformation completed")
            return X_train, y_train, X_test, y_test

        except Exception as e:
            raise CustomException(e, sys)

In [None]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    X_train, y_train, X_test, y_test = data_transformation.initiate_data_transformation()
except Exception as e:
    raise CustomException(e, sys)

In [10]:
STAGE_NAME = "Data Transformation Stage"

class DataTransformationPipeline:
    def __init__(self):
        self.X_train = None
        self.y_train = None
        self.X_test = None
        self.y_test = None

    def main(self):
        try:
            config = ConfigurationManager()
            data_transformation_config = config.get_data_transformation_config()
            data_transformation = DataTransformation(config=data_transformation_config)
            self.X_train, self.y_train, self.X_test, self.y_test = data_transformation.initiate_data_transformation()
        except Exception as e:
            raise CustomException(e, sys)
        


In [11]:
if __name__ == '__main__':
    try:
        logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
        obj = DataTransformationPipeline()
        obj.main()
        logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
    except Exception as e:
        logger.exception(e)
        raise CustomException(e, sys)

[2024-03-19 13:01:32,806: INFO: 3222589211: >>>>>> stage Data Transformation Stage started <<<<<<]
[2024-03-19 13:01:32,816: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-03-19 13:01:32,821: INFO: common: yaml file: params.yaml loaded successfully]
[2024-03-19 13:01:32,824: INFO: common: created directory at: artifacts]
[2024-03-19 13:01:32,824: INFO: common: created directory at: artifacts/data_transformation]
[2024-03-19 13:02:05,495: INFO: 525598538: train and test dataframe loaded]
[2024-03-19 13:02:05,577: INFO: 525598538: Data transformation initiated]
[2024-03-19 13:33:10,405: INFO: 525598538: Data transformation completed]
[2024-03-19 13:33:10,440: INFO: 3222589211: >>>>>> stage Data Transformation Stage completed <<<<<<



In [12]:
obj.y_test

0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
97442    1.0
97443    1.0
97444    0.0
97445    0.0
97446    1.0
Name: generated, Length: 97447, dtype: float64

In [13]:
obj.X_test

0        real fake feelingsimagine able detect exactly ...
1        seeking multiple opinions help make better cho...
2        addressnamefebruary 9 2011dear teachernamei th...
3        dear teachernameteachername believe cell phone...
4        believe computer examine feeling well believe ...
                               ...                        
97442    senatori writing today express support abolish...
97443    car usage popular mode transportation decades ...
97444    author suggests studying venus worthwhile purs...
97445    schools offering home schooling students losin...
97446    deer principal hm writing regarding proposal s...
Name: text, Length: 97447, dtype: object