In [3]:
import os

In [4]:
%pwd

'c:\\Users\\adminn\\Documents\\GitHub\\NBA\\research'

In [5]:
os.chdir('../')


In [13]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    columns: dict
    model_name: str
    C: float
    class_weight: str
    penalty: str
    solver: str
    target_column: str

In [17]:
from NBA_Project.constants import *
from src.NBA_Project.utils.common import read_yaml,create_directories


class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    def get_model_trainer_config(self)-> ModelTrainerConfig:

        config=self.config.model_trainer
        params=self.params.LogisticRegression
        schema=self.schema.TARGET_COLUMN
        col=self.schema.COLUMNS

        create_directories([config.root_dir])

        model_trainer_config=ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name=config.model_name,
            C=params.C,
            class_weight=params.class_weight,
            solver=params.solver,
            penalty=params.penalty,
            target_column=schema.name,
            columns=col
)
        

        return model_trainer_config

In [32]:
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline


class ModelTrainer:

    def __init__(self,config:ModelTrainerConfig):
        self.config=config
    def train(self):
        pipeline = Pipeline([
            ('scaler', StandardScaler()), 
            ('classifier', LogisticRegression(class_weight=self.config.class_weight, solver=self.config.solver, C=self.config.C, penalty=self.config.penalty))  # Étape de classification
        ])

        train_data = pd.read_csv(self.config.train_data_path)
        train_x = train_data.drop([self.config.target_column], axis=1)
        print(train_data.columns)
        columns_keep=list(self.config.columns.keys())
        print(columns_keep)
        train_x=train_x[columns_keep]
        train_y = train_data[[self.config.target_column]]

        pipeline.fit(train_x, train_y)


        joblib.dump(pipeline, os.path.join(self.config.root_dir, self.config.model_name))





In [33]:
try:
    config=ConfigurationManager()
    model_trainer_config=config.get_model_trainer_config()
    model_trainer_config=ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2023-10-21 19:13:26,000: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-10-21 19:13:26,003: INFO: common: yaml file: params.yaml loaded successfully]
[2023-10-21 19:13:26,004: INFO: common: yaml file: schema.yaml loaded successfully]
[2023-10-21 19:13:26,006: INFO: common: created directory at artifacts]
[2023-10-21 19:13:26,007: INFO: common: created directory at artifacts/model_trainer]
Index(['GP', 'MIN', 'PTS', 'FGM', 'FGA', 'FG%', '3P Made', '3PA', '3P%', 'FTM',
       'FTA', 'FT%', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV',
       'TARGET_5Yrs'],
      dtype='object')
['GP', 'MIN', 'FTM', '3PA', 'OREB', 'BLK', '3P Made', '3P%', 'FG%']


  y = column_or_1d(y, warn=True)
