In [52]:
from sklearn.datasets import fetch_openml
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
import dill

In [15]:
with open("TitanicModel.dill","rb") as F:
    xgb_model = dill.load(F)

In [21]:
df = fetch_openml("titanic", version=1, as_frame=True, return_X_y=False)["data"]
df.head()

Unnamed: 0,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,1.0,"Allen, Miss. Elisabeth Walton",female,29.0,0.0,0.0,24160,211.3375,B5,S,2.0,,"St Louis, MO"
1,1.0,"Allison, Master. Hudson Trevor",male,0.9167,1.0,2.0,113781,151.55,C22 C26,S,11.0,,"Montreal, PQ / Chesterville, ON"
2,1.0,"Allison, Miss. Helen Loraine",female,2.0,1.0,2.0,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
3,1.0,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1.0,2.0,113781,151.55,C22 C26,S,,135.0,"Montreal, PQ / Chesterville, ON"
4,1.0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1.0,2.0,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"


In [53]:
class ExecutionBranch:
    def __init__(self, Branch:str) -> None:
        self.Branch = self.__Is_Branch_Valid(Branch)
        self.__AssignPaths()

    def __Is_Branch_Valid(self,Branch):
        if Branch in ["Dev","Test","Prod"]:
            print(f"WARNING: Pipeline run will be executed in {Branch} Branch")            
            return Branch 
        else:
            raise Exception(f"Incompatible Brach '{Branch}'")

    def __AssignPaths(self)->None:
        
        if self.Branch == "Dev":
            self.Train_Path = f"Dev_Train_Delta_Path_{self.Branch}"
            self.TestControl_Path = f"Dev_TestControl_Delta_Path_{self.Branch}"
            self.S3_Target = f"Dev_S3_Target_Path_{self.Branch}"
            self.S3_Staging = f"Dev_S3_Staging_Path_{self.Branch}"
            self.MLFlow_Experiment = f"Dev_MLFlow_Experiment_Path_{self.Branch}"
        elif self.Branch == "Test":
            self.Train_Path = f"Test_Train_Delta_Path_{self.Branch}"
            self.TestControl_Path = f"Test_TestControl_Delta_Path_{self.Branch}"
            self.S3_Target = f"Test_S3_Target_Path_{self.Branch}"
            self.S3_Staging = f"Test_S3_Staging_Path_{self.Branch}"
            self.MLFlow_Experiment = f"Test_MLFlow_Experiment_Path_{self.Branch}"
        elif self.Branch == "Prod":
            self.Train_Path = f"Prod_Train_Delta_Path_{self.Branch}"
            self.TestControl_Path = f"Prod_TestControl_Delta_Path_{self.Branch}"
            self.S3_Target = f"Prod_S3_Target_Path_{self.Branch}"
            self.S3_Staging = f"Prod_S3_Staging_Path_{self.Branch}"
            self.MLFlow_Experiment = f"Prod_MLFlow_Experiment_Path_{self.Branch}"

    def __CreateDevEnv(self:):
        pass
    
    def PrintPaths(self) -> None:
        Paths =  "Train_Path: {0.Train_Path}\nTestControl_Path: {0.TestControl_Path}\
        \nS3_Target: {0.S3_Target}\nS3_Staging: {0.S3_Staging}\nMLFlow_Experiment: {0.MLFlow_Experiment}".format(self)

        print(Paths)

In [22]:
class PreProcessing(TransformerMixin):
    def __init__(self,cols,AVGT5_Pipe = OM_AVGT5)->None:
        self.cols = cols
        self.AVGT5_Pipe = AVGT5_Pipe
    
    def fit(self,X:pd.DataFrame,y:pd.Series) -> "PreProcessing":
        return self
    
    def transform(self, X) -> pd.DataFrame:
        AVGT5 = self.OM_AVGT5(**params)
        AVGT5.Run_prep()
        AVGT5.Run_scor()
        
        

In [41]:
class Scorer(BaseEstimator):
    def __init__(self,model):
        self.model = model
    
    def fit(self,X,y):
        return self
    
    def predict(self,X):
        return self.model.predict_proba(X)[:,1]
    
    

In [42]:
cols = ["age","sibsp","parch","pclass"]
MyPipe = Pipeline(steps=[("PreProcessing",PreProcessing(cols = cols)),
                         ("Scorer",Scorer(model=xgb_model))])

In [50]:
MyPipe.predict(X=df)

array([0.58886623, 0.7709277 , 0.7709277 , ..., 0.23757528, 0.23757528,
       0.2759573 ], dtype=float32)

In [68]:
class PreProcessing_v2(TransformerMixin,ExecutionBranch):
    def __init__(self,cols,branch)->None:
        super().__init__(Branch = branch)
        self.cols = cols
    
    def fit(self,X:pd.DataFrame,y:pd.Series) -> "PreProcessing":
        return self
    
    def SpecifcDev(self):
        if self.Branch = "Dev" self.__
    
    def transform(self, X) -> pd.DataFrame:
        X = X.copy()
        X = X[self.cols].copy()
        X["S3_Target"] = self.S3_Target
        return X

In [70]:
cols = ["age","sibsp","parch","pclass"]
MyPipe = Pipeline(steps=[("PreProcessing",PreProcessing_v2(cols = cols,branch="Dev"))])
MyPipe.transform(X=df)



Unnamed: 0,age,sibsp,parch,pclass,S3_Target
0,29.0000,0.0,0.0,1.0,Dev_S3_Target_Path_Dev
1,0.9167,1.0,2.0,1.0,Dev_S3_Target_Path_Dev
2,2.0000,1.0,2.0,1.0,Dev_S3_Target_Path_Dev
3,30.0000,1.0,2.0,1.0,Dev_S3_Target_Path_Dev
4,25.0000,1.0,2.0,1.0,Dev_S3_Target_Path_Dev
...,...,...,...,...,...
1304,14.5000,1.0,0.0,3.0,Dev_S3_Target_Path_Dev
1305,,1.0,0.0,3.0,Dev_S3_Target_Path_Dev
1306,26.5000,0.0,0.0,3.0,Dev_S3_Target_Path_Dev
1307,27.0000,0.0,0.0,3.0,Dev_S3_Target_Path_Dev


In [74]:
class PreProcessing_v3(TransformerMixin,ExecutionBranch):
    def __init__(self,cols,branch,model)->None:
        super().__init__(Branch = branch)
        self.cols = cols
        self.model = model
        config = self.__bringconfig()
    
    def fit(self,X:pd.DataFrame,y:pd.Series) -> "PreProcessing":
        return self
    
    def transform(self, X) -> pd.DataFrame:
        X = X.copy()
        X = X[self.cols].copy()
        
        X["Pred"] = self.model.predict_proba(X=X)[:,1]
        X["S3_Target"] = self.S3_Target
        return X

In [75]:
cols = ["age","sibsp","parch","pclass"]
MyPipe = Pipeline(steps=[("PreProcessing",PreProcessing_v3(cols = cols,branch="Dev",model=xgb_model))])
MyPipe.transform(X=df)



Unnamed: 0,age,sibsp,parch,pclass,Pred,S3_Target
0,29.0000,0.0,0.0,1.0,0.588866,Dev_S3_Target_Path_Dev
1,0.9167,1.0,2.0,1.0,0.770928,Dev_S3_Target_Path_Dev
2,2.0000,1.0,2.0,1.0,0.770928,Dev_S3_Target_Path_Dev
3,30.0000,1.0,2.0,1.0,0.762512,Dev_S3_Target_Path_Dev
4,25.0000,1.0,2.0,1.0,0.762512,Dev_S3_Target_Path_Dev
...,...,...,...,...,...,...
1304,14.5000,1.0,0.0,3.0,0.345370,Dev_S3_Target_Path_Dev
1305,,1.0,0.0,3.0,0.247575,Dev_S3_Target_Path_Dev
1306,26.5000,0.0,0.0,3.0,0.237575,Dev_S3_Target_Path_Dev
1307,27.0000,0.0,0.0,3.0,0.237575,Dev_S3_Target_Path_Dev


In [None]:
MainPipe

In [None]:
MainPipe(branch="Prod")

In [None]:
MainPipe(branch="test")