In [1]:
from collections import namedtuple 

In [2]:
DataIngetionConfig = namedtuple("DataIngetionConfig", 
                      ["dataset_download_dir", "tgz_download_dir", "raw_data_dir", "ingested_train_dir", "ingested_test_dir"])

In [3]:
data_ingestion_config = DataIngetionConfig(dataset_download_dir="bsjdl",
                   tgz_download_dir="hdsbld",
                   raw_data_dir="dbufosuj",
                   ingested_test_dir="djobw",
                   ingested_train_dir="oudcjdsk")

In [4]:
data_ingestion_config

DataIngetionConfig(dataset_download_dir='bsjdl', tgz_download_dir='hdsbld', raw_data_dir='dbufosuj', ingested_train_dir='oudcjdsk', ingested_test_dir='djobw')

In [5]:
import os

In [6]:
os.getcwd()

'e:\\ML_Projects\\Basic_ML_Project\\notebook'

In [7]:
os.chdir("e:\\ML_Projects\\Basic_ML_Project")

In [8]:
os.getcwd()

'e:\\ML_Projects\\Basic_ML_Project'

In [9]:
os.listdir()

['.dockerignore',
 '.git',
 '.github',
 '.gitignore',
 'app.py',
 'build',
 'config',
 'demo.py',
 'dist',
 'Dockerfile',
 'House_Predictor.egg-info',
 'housing',
 'housing_logs',
 'LICENSE',
 'notebook',
 'pyproject.toml',
 'README.md']

In [10]:
os.listdir('config')

['base_dataset.csv', 'config.yaml', 'model.yaml', 'schema.yaml']

In [11]:
config_file_path = os.path.join("config", "config.yaml")

In [12]:
config_file_path

'config\\config.yaml'

In [13]:
os.path.exists(config_file_path)

True

In [14]:
import yaml
config_info=None
with open(config_file_path, "rb") as yaml_file:
    config_info=yaml.safe_load(yaml_file)

In [15]:
config_info

{'training_pipeline_config': {'pipeline_name': 'housing',
  'artifact_dir': 'artifact'},
 'data_ingestion_config': {'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',
  'raw_data_dir': 'raw_data',
  'tgz_download_dir': 'tgz_data',
  'ingested_dir': 'ingested_data',
  'ingested_train_dir': 'train',
  'ingested_test_dir': 'test'},
 'data_validation_config': {'schema_dir': 'config',
  'base_dataset_dir': 'config',
  'base_dataset_file_name': 'base_dataset.csv',
  'schema_file_name': 'schema.yaml',
  'train_report_file_name': 'train_report.json',
  'train_report_page_file_name': 'train_report.html',
  'test_report_file_name': 'test_report.json',
  'test_report_page_file_name': 'test_report.html'},
 'data_transformation_config': {'add_bedroom_per_room': True,
  'transformed_dir': 'transformed_data',
  'transformed_train_dir': 'train',
  'transformed_test_dir': 'test',
  'preprocessing_dir': 'preprocessed',
  'preprocessed_objec

In [16]:
config_info["data_ingestion_config"]

{'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',
 'raw_data_dir': 'raw_data',
 'tgz_download_dir': 'tgz_data',
 'ingested_dir': 'ingested_data',
 'ingested_train_dir': 'train',
 'ingested_test_dir': 'test'}

In [17]:
def read_yaml_file(file_path:str) -> dict:
    """
    It reads a YAML file and retun it's content in the form of dictionary
    file_path:str
    """
    try:
        with open(file_path, "rb") as yaml_file:
            return yaml.safe_load(yaml_file)
    except Exception as e:
        raise e

In [18]:
config = read_yaml_file(config_file_path)
config

{'training_pipeline_config': {'pipeline_name': 'housing',
  'artifact_dir': 'artifact'},
 'data_ingestion_config': {'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',
  'raw_data_dir': 'raw_data',
  'tgz_download_dir': 'tgz_data',
  'ingested_dir': 'ingested_data',
  'ingested_train_dir': 'train',
  'ingested_test_dir': 'test'},
 'data_validation_config': {'schema_dir': 'config',
  'base_dataset_dir': 'config',
  'base_dataset_file_name': 'base_dataset.csv',
  'schema_file_name': 'schema.yaml',
  'train_report_file_name': 'train_report.json',
  'train_report_page_file_name': 'train_report.html',
  'test_report_file_name': 'test_report.json',
  'test_report_page_file_name': 'test_report.html'},
 'data_transformation_config': {'add_bedroom_per_room': True,
  'transformed_dir': 'transformed_data',
  'transformed_train_dir': 'train',
  'transformed_test_dir': 'test',
  'preprocessing_dir': 'preprocessed',
  'preprocessed_objec

In [19]:
from housing.constant import *
config[TRAINING_PIPELINE_CONFIG_KEY]

{'pipeline_name': 'housing', 'artifact_dir': 'artifact'}

In [20]:
config[TRAINING_PIPELINE_CONFIG_KEY][TRAINING_PIPELINE_NAME_KEY]

'housing'

In [21]:
training_pipeline_config = config[TRAINING_PIPELINE_CONFIG_KEY]
artifact_dir = os.path.join(ROOT_DIR,
                            training_pipeline_config[TRAINING_PIPELINE_NAME_KEY],
                            training_pipeline_config[TRAINING_PIPELINE_ARTIFACT_DIR_KEY])
artifact_dir

'e:\\ML_Projects\\Basic_ML_Project\\housing\\artifact'

In [22]:
from housing.config.configuration import Configuration
configuration = Configuration()
configuration.get_training_pipeline_config()

TrainingPipelineConfig(artifact_dir='e:\\ML_Projects\\Basic_ML_Project\\housing\\artifact')

In [23]:
configuration.get_data_ingestion_config()

DataIngetionConfig(dataset_download_dir='https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz', tgz_download_dir='e:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_ingestion\\2025-10-08-13-04-04\\tgz_data', raw_data_dir='e:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_ingestion\\2025-10-08-13-04-04\\raw_data', ingested_train_dir='e:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_ingestion\\2025-10-08-13-04-04\\ingested_data\\train', ingested_test_dir='e:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_ingestion\\2025-10-08-13-04-04\\ingested_data\\test')

In [24]:
configuration.get_data_transformation_config()

DataTransformationConfig(add_bedroom_per_room=True, transformed_train_dir='e:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_transformation\\2025-10-08-13-04-04\\transformed_data\\train', transformed_test_dir='e:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_transformation\\2025-10-08-13-04-04\\transformed_data\\test', preprocessed_object_file_path='e:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_transformation\\2025-10-08-13-04-04\\preprocessed\\preprocessed.pkl')

In [25]:
configuration.get_data_validation_config()

DataValidationConfig(base_dataset_file_path='e:\\ML_Projects\\Basic_ML_Project\\config\\base_dataset.csv', schema_file_path='e:\\ML_Projects\\Basic_ML_Project\\config\\schema.yaml', train_report_file_path='e:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_validation\\2025-10-08-13-04-04\\train_report.json', train_report_page_file_path='e:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_validation\\2025-10-08-13-04-04\\train_report.html', test_report_file_path='e:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_validation\\2025-10-08-13-04-04\\test_report.json', test_report_page_file_path='e:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_validation\\2025-10-08-13-04-04\\test_report.html')

In [26]:
configuration.get_model_evaluation_config()

ModelEvaluationConfig(model_evaluation_file_path='e:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\model_evaluation\\model_evaluation.yaml', time_stamp='2025-10-08-13-04-04')

In [27]:
configuration.get_model_pusher_config()

ModelPusherConfig(export_dir_path='e:\\ML_Projects\\Basic_ML_Project\\saved_models\\20251008130407')

In [28]:
configuration.get_model_trainer_config()

ModelTrainerConfig(trained_model_file_path='e:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\model_trainer\\2025-10-08-13-04-04\\trained_model\\model.pkl', base_accuracy=0.6, model_config_file_path='config\\model.yaml')

In [29]:
url = "https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz"
os.path.basename(url)


'housing.tgz'

In [30]:
file_path = "E:\\ML_Projects\\Basic_ML_Project\\config"

In [31]:
os.listdir(file_path)[0]

'base_dataset.csv'

In [32]:
from housing.pipeline.pipeline import Pipeline
from housing.config.configuration import Configuration
config = Configuration()
obj = Pipeline(config=config)
obj.run_pipeline()

In [33]:
import pandas as pd
import numpy as np


dataset_file_path = "E:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_ingestion\\2025-09-14-11-02-45\\raw_data\\housing.csv"
data_frame = pd.read_csv(dataset_file_path)

In [34]:
pwd

'e:\\ML_Projects\\Basic_ML_Project'

In [35]:
data_frame

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY
...,...,...,...,...,...,...,...,...,...,...
20635,-121.09,39.48,25.0,1665.0,374.0,845.0,330.0,1.5603,78100.0,INLAND
20636,-121.21,39.49,18.0,697.0,150.0,356.0,114.0,2.5568,77100.0,INLAND
20637,-121.22,39.43,17.0,2254.0,485.0,1007.0,433.0,1.7000,92300.0,INLAND
20638,-121.32,39.43,18.0,1860.0,409.0,741.0,349.0,1.8672,84700.0,INLAND


In [36]:
data_frame.dtypes.index, data_frame.dtypes.values

(Index(['longitude', 'latitude', 'housing_median_age', 'total_rooms',
        'total_bedrooms', 'population', 'households', 'median_income',
        'median_house_value', 'ocean_proximity'],
       dtype='object'),
 array([dtype('float64'), dtype('float64'), dtype('float64'),
        dtype('float64'), dtype('float64'), dtype('float64'),
        dtype('float64'), dtype('float64'), dtype('float64'), dtype('O')],
       dtype=object))

In [37]:
list(map(lambda x: str(x).replace("dtype('", "").replace("')", ""), data_frame.dtypes.values))

['float64',
 'float64',
 'float64',
 'float64',
 'float64',
 'float64',
 'float64',
 'float64',
 'float64',
 'object']

In [38]:
data_type = list(map(lambda x: str(x).replace("dtype('", "").replace("')", ""), data_frame.dtypes.values))
columns = data_frame.dtypes.index

In [39]:
dict(zip(columns, data_type))

{'longitude': 'float64',
 'latitude': 'float64',
 'housing_median_age': 'float64',
 'total_rooms': 'float64',
 'total_bedrooms': 'float64',
 'population': 'float64',
 'households': 'float64',
 'median_income': 'float64',
 'median_house_value': 'float64',
 'ocean_proximity': 'object'}

In [40]:
from housing.util.util import read_yaml_file
schema_file_path = "E:\\ML_Projects\\Basic_ML_Project\\config\\schema.yaml"
testing_file_path = "E:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_ingestion\\2025-09-14-11-02-45\\ingested_data\\test\\housing.csv"
training_file_path = "E:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_ingestion\\2025-09-14-11-02-45\\ingested_data\\train\\housing.csv"
schema_file_info = read_yaml_file(file_path=schema_file_path)
schema_file_info

{'columns': {'longitude': 'float',
  'latitude': 'float',
  'housing_median_age': 'float',
  'total_rooms': 'float',
  'total_bedrooms': 'float',
  'population': 'float',
  'households': 'float',
  'median_income': 'float',
  'median_house_value': 'float',
  'ocean_proximity': 'category'},
 'numerical_columns': ['longitude',
  'latitude',
  'housing_median_age',
  'total_rooms',
  'total_bedrooms',
  'population',
  'households',
  'median_income'],
 'categorical_columns': ['ocean_proximity'],
 'target_column': 'median_house_value',
 'domain_value': {'ocean_proximity': ['<1H OCEAN',
   'INLAND',
   'ISLAND',
   'NEAR BAY',
   'NEAR OCEAN']}}

In [41]:
list(schema_file_info["columns"].keys())

['longitude',
 'latitude',
 'housing_median_age',
 'total_rooms',
 'total_bedrooms',
 'population',
 'households',
 'median_income',
 'median_house_value',
 'ocean_proximity']

In [42]:
training_dataset = pd.read_csv(training_file_path)
testing_dataset = pd.read_csv(testing_file_path)

In [43]:
list(training_dataset.columns)

['longitude',
 'latitude',
 'housing_median_age',
 'total_rooms',
 'total_bedrooms',
 'population',
 'households',
 'median_income',
 'median_house_value',
 'ocean_proximity']

In [44]:
if set(schema_file_info["columns"].keys()) == set(training_dataset.columns) and set(testing_dataset.columns):
    print("true")

true


In [45]:
schema_file_info["domain_value"]["ocean_proximity"]

['<1H OCEAN', 'INLAND', 'ISLAND', 'NEAR BAY', 'NEAR OCEAN']

In [46]:
for cat_features in schema_file_info["categorical_columns"]:
    unique_values_in_schema = schema_file_info["domain_value"][cat_features]
    print(unique_values_in_schema)

['<1H OCEAN', 'INLAND', 'ISLAND', 'NEAR BAY', 'NEAR OCEAN']


In [47]:
training_dataset_cat_features = training_dataset.select_dtypes(include='object').columns.to_list()
training_dataset_cat_features

['ocean_proximity']

In [48]:
testing_dataset_cat_features = testing_dataset.select_dtypes(include='object').columns.to_list()
list(testing_dataset[testing_dataset_cat_features[0]].unique())

['<1H OCEAN', 'NEAR OCEAN', 'NEAR BAY', 'INLAND', 'ISLAND']

In [49]:
length = 0
valid_domain_values = True
if len(schema_file_info["categorical_columns"]) == len(training_dataset_cat_features) == len(testing_dataset_cat_features):
    length = len(schema_file_info["categorical_columns"])
for i in range(length):
    unique_values_in_schema = set(schema_file_info["domain_value"][schema_file_info["categorical_columns"][i]])
    unique_values_in_training_dataset = set(training_dataset[training_dataset_cat_features[i]].unique())
    unique_values_in_testing_dataset = set(testing_dataset[testing_dataset_cat_features[0]].unique())
    if unique_values_in_schema != unique_values_in_testing_dataset or unique_values_in_schema != unique_values_in_training_dataset:
            valid_domain_values = False
valid_domain_values

True

In [50]:
a = {"a", "b"}
b = {"b", "a"}
if a == b:
    print("true")
else :
    print("false")

true


In [51]:
import pandas as pd
import json
from evidently import Report
from evidently.presets import DataDriftPreset

train_df = pd.read_csv("E:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_ingestion\\2025-09-14-11-02-45\\ingested_data\\train\\housing.csv")
test_df = pd.read_csv("E:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_ingestion\\2025-09-14-11-02-45\\ingested_data\\test\\housing.csv")

report = Report([
    DataDriftPreset(method="psi")
],
include_tests=True)
my_eval = report.run(train_df, test_df)
my_eval.save_html("./notebook/demo_report_page.html")
# my_eval.json()
# json.loads(my_eval.json())
# result = my_eval.dict()
result = json.loads(my_eval.json())

result['metrics'][0]['value']['share']


0.0

In [52]:
is_validated = False
is_data_drift = False
validation_status = True
is_available = True
message = "Data validation is not completed sucessfully"
if (is_available and validation_status and not is_data_drift):
                is_validated = True
                message = "Data validation is completed sucessfully"
print(message, is_validated)

Data validation is completed sucessfully True


In [53]:
""" 
we can check data drift manually without using evidently, by using scipy
There is a function in scipy.stats called ks_2samp which takes
2 arrays(columns) and give pvalue if it is 1 
that means both arrays having exactly ssame distribution.
you can take an average of pvalues of all columns of datasets
and can do hypothesis testing with threshold pvalue 0.5.
"""

' \nwe can check data drift manually without using evidently, by using scipy\nThere is a function in scipy.stats called ks_2samp which takes\n2 arrays(columns) and give pvalue if it is 1 \nthat means both arrays having exactly ssame distribution.\nyou can take an average of pvalues of all columns of datasets\nand can do hypothesis testing with threshold pvalue 0.5.\n'

In [54]:
data_frame.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [55]:
data_frame.shape

(20640, 10)

In [56]:
data_frame.isnull().sum()

longitude               0
latitude                0
housing_median_age      0
total_rooms             0
total_bedrooms        207
population              0
households              0
median_income           0
median_house_value      0
ocean_proximity         0
dtype: int64

In [57]:
x, y = data_frame.drop(columns=["median_house_value"], axis=1), data_frame[["median_house_value"]]

In [58]:
x.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,NEAR BAY


In [59]:
y.head()

Unnamed: 0,median_house_value
0,452600.0
1,358500.0
2,352100.0
3,341300.0
4,342200.0


In [60]:
x.shape

(20640, 9)

In [61]:
y.shape

(20640, 1)

In [62]:
from sklearn.impute import SimpleImputer


In [63]:
numerical_columns = data_frame.drop(columns=["ocean_proximity"], axis=1)

In [64]:
numerical_columns.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0


In [65]:
simple_imputer = SimpleImputer(strategy="median")

In [66]:
df = simple_imputer.fit_transform(numerical_columns)

In [67]:
df

array([[-1.2223e+02,  3.7880e+01,  4.1000e+01, ...,  1.2600e+02,
         8.3252e+00,  4.5260e+05],
       [-1.2222e+02,  3.7860e+01,  2.1000e+01, ...,  1.1380e+03,
         8.3014e+00,  3.5850e+05],
       [-1.2224e+02,  3.7850e+01,  5.2000e+01, ...,  1.7700e+02,
         7.2574e+00,  3.5210e+05],
       ...,
       [-1.2122e+02,  3.9430e+01,  1.7000e+01, ...,  4.3300e+02,
         1.7000e+00,  9.2300e+04],
       [-1.2132e+02,  3.9430e+01,  1.8000e+01, ...,  3.4900e+02,
         1.8672e+00,  8.4700e+04],
       [-1.2124e+02,  3.9370e+01,  1.6000e+01, ...,  5.3000e+02,
         2.3886e+00,  8.9400e+04]], shape=(20640, 9))

In [68]:
simple_imputer.feature_names_in_

array(['longitude', 'latitude', 'housing_median_age', 'total_rooms',
       'total_bedrooms', 'population', 'households', 'median_income',
       'median_house_value'], dtype=object)

In [69]:
simple_imputer.statistics_

array([-1.1849e+02,  3.4260e+01,  2.9000e+01,  2.1270e+03,  4.3500e+02,
        1.1660e+03,  4.0900e+02,  3.5348e+00,  1.7970e+05])

In [70]:
x.longitude.median()

np.float64(-118.49)

In [71]:
x.ocean_proximity.value_counts()

ocean_proximity
<1H OCEAN     9136
INLAND        6551
NEAR OCEAN    2658
NEAR BAY      2290
ISLAND           5
Name: count, dtype: int64

In [72]:
# Concatenating 2-D arrays
c = np.array([[1, 2], [3, 4]])
d = np.array([[5, 6], [7, 8]])
result_2d = np.c_[c, d]
print(f"\nConcatenating 2-D arrays:\n{result_2d}")


Concatenating 2-D arrays:
[[1 2 5 6]
 [3 4 7 8]]


In [73]:
from housing.entity.model_factory import ModelFactory, get_sample_model_config_yaml_file


In [74]:
get_sample_model_config_yaml_file(export_dir="config")

'config\\model.yaml'

In [80]:
import yaml
model_config_file_path = os.path.join(CONFIG_DIR, "model.yaml")
with open(model_config_file_path) as f:
    config = yaml.safe_load(f)
print(config)

{'grid_search': {'class': 'GridSearchCV', 'module': 'sklearn.model_selection', 'params': {'cv': 5, 'verbose': 2}}, 'model_selection': {'module_0': {'class': 'LinearRegression', 'module': 'sklearn.linear_model', 'params': {'fit_intercept': True}, 'search_param_grid': {'fit_intercept': [True, False]}}, 'module_1': {'class': 'RandomForestRegressor', 'module': 'sklearn.ensemble', 'params': {'min_samples_leaf': 3}, 'search_param_grid': {'min_samples_leaf': [6]}}}}


In [81]:

model_factory = ModelFactory(model_config_path=model_config_file_path)

In [82]:
model_factory.get_initialized_model_list()

{'fit_intercept': True}
{'min_samples_leaf': 3}


[InitializedModelDetail(model_serial_number='module_0', model=LinearRegression(), param_grid_search={'fit_intercept': [True, False]}, model_name='sklearn.linear_model.LinearRegression'),
 InitializedModelDetail(model_serial_number='module_1', model=RandomForestRegressor(min_samples_leaf=3), param_grid_search={'min_samples_leaf': [6]}, model_name='sklearn.ensemble.RandomForestRegressor')]

In [83]:
from housing.util.util import load_numpy_array_data

In [84]:
data_file_path = "E:\\ML_Projects\\Basic_ML_Project\\housing\\artifact\\data_transformation\\2025-10-08-12-33-18\\transformed_data\\train\\housing.npz"

In [85]:
data = load_numpy_array_data(data_file_path)

In [86]:
x, y = data[:, :-1], data[:,-1]

In [87]:
best_model = model_factory.get_best_model(x, y)

{'fit_intercept': True}
{'min_samples_leaf': 3}
{'cv': 5, 'verbose': 2}
Fitting 5 folds for each of 2 candidates, totalling 10 fits
[CV] END .................................fit_intercept=True; total time=   0.0s
[CV] END .................................fit_intercept=True; total time=   0.0s
[CV] END .................................fit_intercept=True; total time=   0.0s
[CV] END .................................fit_intercept=True; total time=   0.0s
[CV] END .................................fit_intercept=True; total time=   0.0s
[CV] END ................................fit_intercept=False; total time=   0.0s
[CV] END ................................fit_intercept=False; total time=   0.0s
[CV] END ................................fit_intercept=False; total time=   0.0s
[CV] END ................................fit_intercept=False; total time=   0.0s
[CV] END ................................fit_intercept=False; total time=   0.0s
{'cv': 5, 'verbose': 2}
Fitting 5 folds for each of 1 cand

In [88]:
best_model

GridSearchedBestModel(model_serial_number='module_1', model=RandomForestRegressor(min_samples_leaf=3), best_model=RandomForestRegressor(min_samples_leaf=6), best_parameters={'min_samples_leaf': 6}, best_score=np.float64(0.8040732205106356))

In [91]:
model_factory.grid_searched_best_model_list

[GridSearchedBestModel(model_serial_number='module_0', model=LinearRegression(), best_model=LinearRegression(), best_parameters={'fit_intercept': True}, best_score=np.float64(0.6434786138130926)),
 GridSearchedBestModel(model_serial_number='module_1', model=RandomForestRegressor(min_samples_leaf=3), best_model=RandomForestRegressor(min_samples_leaf=6), best_parameters={'min_samples_leaf': 6}, best_score=np.float64(0.8040732205106356))]

In [92]:
best_model = model_factory.get_best_model(x, y, 0.9)

{'fit_intercept': True}
{'min_samples_leaf': 3}
{'cv': 5, 'verbose': 2}
Fitting 5 folds for each of 2 candidates, totalling 10 fits
[CV] END .................................fit_intercept=True; total time=   0.0s
[CV] END .................................fit_intercept=True; total time=   0.0s
[CV] END .................................fit_intercept=True; total time=   0.0s
[CV] END .................................fit_intercept=True; total time=   0.0s
[CV] END .................................fit_intercept=True; total time=   0.0s
[CV] END ................................fit_intercept=False; total time=   0.0s
[CV] END ................................fit_intercept=False; total time=   0.0s
[CV] END ................................fit_intercept=False; total time=   0.0s
[CV] END ................................fit_intercept=False; total time=   0.0s
[CV] END ................................fit_intercept=False; total time=   0.0s
{'cv': 5, 'verbose': 2}
Fitting 5 folds for each of 1 cand

HousingException: 
        Error occured in script: 
        [ e:\ML_Projects\Basic_ML_Project\housing\entity\model_factory.py ] at 
        try block line number: [368] and exception block line number: [372] 
        error message: [
        Error occured in script: 
        [ e:\ML_Projects\Basic_ML_Project\housing\entity\model_factory.py ] at 
        try block line number: [352] and exception block line number: [356] 
        error message: [None of Model has base accuracy: 0.9]
        ]
        