### EDA
EDA Goes into further detail
* notebook/Project_EDA_001_001.ipynb
* notebook/Project_EDA_002_001.ipynb

### Notebook with model training

notebook/Project_Model_Training.ipynb

Inside data_ingestion.py - src/components/data_ingestion.py

```python
import os
import sys
import pandas as pd

from sklearn.model_selection import train_test_split
from dataclasses import dataclass

from src.exception import CustomException
from src.logger import logging

from src.components.data_transformation import DataTransformation

@dataclass
class DataIngestionConfig:
    train_data_path: str = os.path.join('artifacts',"train.csv")
    test_data_path: str = os.path.join('artifacts',"test.csv")
    raw_data_path: str = os.path.join('artifacts', 'data.csv')

class DataIngestion:
    def __init__(self):
        self.ingestion_config = DataIngestionConfig()
    
    def initiate_data_ingestion(self):
        logging.info("Entered the data ingestion method or component")
        try:
            #pass
            df = pd.read_csv("notebooks/data/students_performance_0760.csv")
            logging.info("Reading the dataset as a DataFrame")
            
            os.makedirs(os.path.dirname(self.ingestion_config.train_data_path), exist_ok=True)

            df.to_csv(self.ingestion_config.raw_data_path, index=False, header=True)

            logging.info("Initiating Train Test Split")
            train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)
            logging.info("Train Test Split Complete")
            
            train_set.to_csv(self.ingestion_config.train_data_path, index=False, header=True)
            test_set.to_csv(self.ingestion_config.test_data_path, index=False, header=True)
            #logging.info("Train and Test Set Saved")
            logging.info("Data Ingestion Complete")
            return(
                self.ingestion_config.train_data_path,
                self.ingestion_config.test_data_path
            )
        except Exception as e:
            raise CustomException(e, sys)

if __name__ == "__main__":
    obj = DataIngestion()
    train_data, test_data = obj.initiate_data_ingestion()
```

Testing it

```Shell
% python src/components/data_ingestion.py
```

Inside logs
```Markdown
logs/04_07_2025_16_13_09.log/04_07_2025_16_13_09.log
[ 2025-04-07 16:13:09,068 ] 22 root - INFO - Entered the data ingestion method or component
[ 2025-04-07 16:13:09,074 ] 26 root - INFO - Reading the dataset as a DataFrame
[ 2025-04-07 16:13:09,076 ] 32 root - INFO - Initiating Train Test Split
[ 2025-04-07 16:13:09,077 ] 34 root - INFO - Train Test Split Completed
[ 2025-04-07 16:13:09,079 ] 38 root - INFO - Train and Test Set Saved
[ 2025-04-07 16:13:09,079 ] 39 root - INFO - Data Ingestion Completed

```


Inside - src/utils.py

```python
import os
import sys
from pathlib import Path
import numpy as np
import pandas as pd
import pickle

from src.exception import CustomException
from src.logger import logging


def save_object(file_path, obj):
    """
    Saves object. Saves pickle file object
    from src.utils import save_object
    save_object(file_path=, obj=)
    """
    try:
        dir_path = os.path.dirname(file_path)
        os.makedirs(dir_path, exist_ok=True)
        
        with os.open(file_path, "wb") as file_obj:
            pickle.dump(obj, file_obj)
    except Exception as e:
        raise CustomException(e,sys)
```

Inside - src/components/data_transformation.py

```Python
import sys
import os
from dataclasses import dataclass
import pandas as pd
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

from src.logger import logging
from src.exception import CustomException
from src.utils import save_object


@dataclass
class DataTransformationConfig:
    preprocessor_obj_file_path:str=os.path.join('artifacts','preprocessor.pkl')

class DataTransformation:
    def __init__(self):
        self.data_transformation_config=DataTransformationConfig
    
    def get_data_transformer_object(self):
        """
        This function Created the data transformation pipeline for all the columns.

        Numerical Pipeline
        Selects and transforms the Numerical columns
            SimpleImputer - Handles the missing values
            Performs Standard Scaling

        Categorical Pipeline
        Selects and transforms the Categorical columns
            SimpleImputer - Handles the missing values
            Performs OneHotEncoding
            Performs Standard Scaling

        Args
          .
          
        Returns
          preprocessor
        """
        try:
            #pass
            numerical_columns= [
                "reading_score",
                "writing_score"
            ]
            categorical_columns=[
                "gender",
                "race_ethnicity",
                "parental_level_of_education",
                "lunch",
                "test_preparation_course"
            ]

            ## Pipelines
            ## SimpleImputer - Handles the missing values
            ## Numerical Pipeline
            num_pipeline = Pipeline(
                steps=[
                    ("imputer", SimpleImputer(strategy="median")),
                    ("scaler", StandardScaler())
                ]
            )

            ## Categorical Pipeline
            cat_pipeline = Pipeline(
                steps=[
                    ("imputer", SimpleImputer(strategy="most_frequent")),
                    ("one_hot_encoder", OneHotEncoder()),
                    ("scaler", StandardScaler(with_mean=False))
                ]
            )
            logging.info(f"Numerical columns: {numerical_columns}")
            logging.info(f"Categorical columns: {categorical_columns}")
            
            ## Merging both pipelines
            preprocessor = ColumnTransformer([
                ("num_pipeline", num_pipeline, numerical_columns),
                ("cat_pipeline", cat_pipeline, categorical_columns)
                ]
            )
            return preprocessor
        
        except Exception as e:
            raise CustomException(e, sys)
    

    def initiate_data_transformation(self, train_path, test_path):
        """
        This function performs data transformation
        Args
          train_path
          test_path
        
        Returns
          train_arr
          test_arr
          self.data_transformation_config.preprocessor_obj_file_path
        
        from src.components.data_transformation import initiate_data_transformation

        initiate_data_transformation(train_path=, test_path=)
        """
        try:
            #pass
            train_df = pd.read_csv(train_path)
            test_df = pd.read_csv(test_path)
            logging.info("Completed Reading Train and Test Data")
            logging.info("Obtaining Preprocessing Object")
            preprocessing_obj = self.get_data_transformer_object()

            target_column_name = "math_score"
            #numerical_columns= ["reading_score","writing_score"]
            #categorical_columns=["gender",
                                 #"race_ethnicity",
                                 #"parental_level_of_education",
                                 #"lunch",
                                 #"test_preparation_course"
                                 #]
            
            ## Selecting X and y
            input_feature_train_df = train_df.drop(columns=[target_column_name], axis=1)
            target_feature_train_df = train_df[target_column_name]

            input_feature_test_df = test_df.drop(columns=[target_column_name], axis=1)
            target_feature_test_df = test_df[target_column_name]

            logging.info(f"Applying preprocessing object on the training and testing dataframes.")

            ## Fit Transform Train - Transform Test
            input_feature_train_arr = preprocessing_obj.fit_transform(input_feature_train_df)
            input_feature_test_arr = preprocessing_obj.transform(input_feature_test_df)

            ## Concat Transformed data with target feature
            train_arr = np.c_[
                input_feature_train_arr, np.array(target_feature_train_df)
            ]
            test_arr = np.c_[input_feature_test_arr, np.array(target_feature_test_df)]
            
            ## Create/get function in utils/save_object()
            save_object(
                file_path=self.data_transformation_config.preprocessor_obj_file_path,
                obj=preprocessing_obj
            )
            
            logging.info("Preprocessing Object Saved")

            return (
                train_arr,
                test_arr,
                self.data_transformation_config.preprocessor_obj_file_path,
            )
        except Exception as e:
            raise CustomException(e, sys)
```


For testing this need to add the code into data_ingestion.py - src/components/data_ingestion.py

```python
import os
import sys
import pandas as pd

from sklearn.model_selection import train_test_split
from dataclasses import dataclass

from src.exception import CustomException
from src.logger import logging

from src.components.data_transformation import DataTransformation

@dataclass
class DataIngestionConfig:
    train_data_path: str = os.path.join('artifacts',"train.csv")
    test_data_path: str = os.path.join('artifacts',"test.csv")
    raw_data_path: str = os.path.join('artifacts', 'data.csv')

class DataIngestion:
    def __init__(self):
        self.ingestion_config = DataIngestionConfig()
    
    def initiate_data_ingestion(self):
        logging.info("Entered the data ingestion method or component")
        try:
            #pass
            df = pd.read_csv("notebooks/data/students_performance_0760.csv")
            logging.info("Reading the dataset as a DataFrame")
            
            os.makedirs(os.path.dirname(self.ingestion_config.train_data_path), exist_ok=True)

            df.to_csv(self.ingestion_config.raw_data_path, index=False, header=True)

            logging.info("Initiating Train Test Split")
            train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)
            logging.info("Train Test Split Complete")
            
            train_set.to_csv(self.ingestion_config.train_data_path, index=False, header=True)
            test_set.to_csv(self.ingestion_config.test_data_path, index=False, header=True)
            #logging.info("Train and Test Set Saved")
            logging.info("Data Ingestion Complete")
            return(
                self.ingestion_config.train_data_path,
                self.ingestion_config.test_data_path
            )
        except Exception as e:
            raise CustomException(e, sys)

if __name__ == "__main__":
    obj = DataIngestion()
    train_data, test_data = obj.initiate_data_ingestion()

    data_transformation = DataTransformation()
    train_arr, test_arr,_ = data_transformation.initiate_data_transformation(train_path=train_data, test_path=test_data)
```



Testing it

```Shell
% python src/components/data_ingestion.py
```

Inside logs - logs/.../(...).log
```Markdown
[ 2025-04-08 15:13:42,020 ] 24 root - INFO - Entered the data ingestion method or component
[ 2025-04-08 15:13:42,028 ] 28 root - INFO - Reading the dataset as a DataFrame
[ 2025-04-08 15:13:42,031 ] 34 root - INFO - Initiating Train Test Split
[ 2025-04-08 15:13:42,032 ] 36 root - INFO - Train Test Split Complete
[ 2025-04-08 15:13:42,034 ] 41 root - INFO - Data Ingestion Complete
[ 2025-04-08 15:13:42,036 ] 114 root - INFO - Completed Reading Train and Test Data
[ 2025-04-08 15:13:42,036 ] 115 root - INFO - Obtaining Preprocessing Object
[ 2025-04-08 15:13:42,036 ] 79 root - INFO - Numerical columns: ['reading_score', 'writing_score']
[ 2025-04-08 15:13:42,036 ] 80 root - INFO - Categorical columns: ['gender', 'race_ethnicity', 'parental_level_of_education', 'lunch', 'test_preparation_course']
[ 2025-04-08 15:13:42,036 ] 134 root - INFO - Applying preprocessing object on the training and testing dataframes.

```
