In [14]:
from Regression.utils.shared import create_directories,get_yamlcontent
from Regression.constants import *
from Regression import logger
from pathlib import Path
from dataclasses import dataclass
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import os
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import r2_score,mean_squared_error
import pickle
import mlflow

@dataclass
class Model_Config:
    Test: Path
    Train:Path
    Load: Path
    mlflow_uri: str

class ConfigurationManager:
        def __init__(self,config_filepath = CONFIG_FILE_PATH):
            self.config = get_yamlcontent(config_filepath)
            create_directories([self.config.artifacts_root])

    
        def get_model_config(self):
                config = self.config.Model_Prepartion

                create_directories([config.path])

                data_ingestion_config = Model_Config(Test=config.test_path,Train=config.train_path,Load=config.path,mlflow_uri=config.MLFLOW_URI)

                return data_ingestion_config



class Trainer:
    def __init__(self,config_data):   
        self.test_p=Path(config_data.Test)
        self.train_p=Path(config_data.Train)
        self.model_loc=Path(config_data.Load)
        self.mlflow_uri=config_data.mlflow_uri
    
    def Model_Training(self):
        try:
            test_df=pd.read_csv(self.test_p)
            train_df=pd.read_csv(self.train_p)
            Data=pd.concat([train_df,test_df],axis=0)
            Data.reset_index(inplace=True)

            Data.drop_duplicates(inplace=True)
            cat=Data.select_dtypes(include='object')
            num=Data.select_dtypes(exclude='object')

            ## Encoding
            encoder=LabelEncoder()
            for column in cat.columns:
                cat[column]=pd.Series(encoder.fit_transform(cat[column]))
            
            self.df=pd.concat([cat,num],axis=1)
        except Exception as e:
            raise e
        
    def Model_Evaluation(self):  
        try:
            X=self.df.drop('CO2 Emissions(g/km)',axis=1)
            Y=self.df['CO2 Emissions(g/km)']

            test_size=0.2

            X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=test_size,random_state=32)
            Std=StandardScaler()
            X_train=Std.fit_transform(X_train)
            X_test=Std.transform(X_test)

            with mlflow.start_run(run_name='mlflow_build'):
                n_estimator=200
                max_depth=12
                mlflow.log_param('max_depth',max_depth)
                mlflow.log_param('n_estimator',n_estimator)
                model=RandomForestClassifier(n_estimators=n_estimator,max_depth=max_depth)
                model.fit(X_train,Y_train)
                Y_pred=model.predict(X_test)
                r2=r2_score(Y_test,Y_pred)
                mse=mean_squared_error(Y_test,Y_pred)
                mlflow.log_metric('R2',r2)
                mlflow.log_metric('Mean Squared Error',mse)
                tu=mlflow.set_tracking_uri("")

                if tu != "file":
                    mlflow.sklearn.log_model(model, "model", registered_model_name="CO2Emission")
                
                else:
                    mlflow.sklearn.log_model(model, "model")
            
            mlflow.end_run()
            os.makedirs(self.model_loc,exist_ok=True)
            pickle.dump(model,open(os.path.join(self.model_loc,'model.pkl'),'wb'))

            logger.info('Successfully Submitted the model')
        except Exception as e:
        #pickle.load(open(os.path.join(self.model_loc,'model.pkl'),'rb'))
             raise e
        
                
try:
        data=ConfigurationManager()
        con=data.get_model_config()
        train=Trainer(con)
        train.Model_Training()
        train.Model_Evaluation()

except Exception as e:
        logger.info(e)


      Make  Model  Vehicle Class  Transmission  Fuel Type  index  Unnamed: 0  \
0        5   1169             10            17          4      0        6590   
1       31    936              5             2          3      1        6274   
2        3   1581              0            25          4      2        2251   
3       32    464             13             9          4      3        3149   
4       41   1870              8            17          4      4        4362   
...    ...    ...            ...           ...        ...    ...         ...   
7380    28    424             10             6          4   1472        7185   
7381    21   1689             11            15          3   1473        1740   
7382     9   1676              9            15          3   1474        4609   
7383    28    725              2            16          4   1475        1844   
7384    28    508              0             9          4   1476        5154   

      Engine Size(L)  Cylinders  Fuel C

Registered model 'CO2Emission' already exists. Creating a new version of this model...
Created version '4' of model 'CO2Emission'.


In [15]:
os.getcwd()

'd:\\MLFLOW'