In [10]:
from data_managers import *
import pandas as pd, numpy as np
import yaml
import pipeline as pipe
import mlflow
import os

from sklearn.metrics import accuracy_score, roc_auc_score

In [4]:
with open("config.yaml", "r") as f:
    config_file = yaml.safe_load(f)

In [5]:
import data_managers as dm

In [6]:
X_train,y_train =  dm.load_data(split= True, data_= "train")

In [7]:
titanic_pipe = pipe.pipeline()

In [8]:
with open("config.yaml", "r") as file:
    config_file = yaml.safe_load(file)

In [None]:
titanic_pipe.fit(X_train, y_train)

Pipeline(steps=[('categorical_imputation',
                 CategoricalImputer(variables=['sex', 'cabin', 'embarked',
                                               'title'])),
                ('missing_indicator',
                 AddMissingIndicator(variables=['age', 'fare'])),
                ('median_imputation',
                 MeanMedianImputer(variables=['age', 'fare'])),
                ('extract_letter',
                 ExtractLetterTransformer(variables=['cabin'])),
                ('rare_label_encoder',
                 RareLabelEncoder(n_categories=1,
                                  variables=['sex', 'cabin', 'embarked',
                                             'title'])),
                ('categorical_encoder',
                 OneHotEncoder(drop_last=True,
                               variables=['sex', 'cabin', 'embarked',
                                          'title'])),
                ('scaler', StandardScaler()),
                ('Logit', LogisticRegress

In [None]:
class_ = titanic_pipe.predict(X_train)
pred = titanic_pipe.predict_proba(X_train)[:, 1]

In [1]:
cmd = 'mlflow ui'
os.system(cmd)

In [None]:
mlflow_config = config_file["mlflow_config"]
remote_server_uri = mlflow_config["remote_server_uri"]

mlflow.set_tracking_uri(remote_server_uri)

mlflow.set_experiment(mlflow_config["experiment_name"])

with mlflow.start_run(run_name=mlflow_config["run_name"]) as mlops_run:

    titanic_pipe.fit(X_train, y_train)

    class_ = titanic_pipe.predict(X_train)
    pred = titanic_pipe.predict_proba(X_train)[:, 1]

    # Logging parameter of the model
    mlflow.log_param("C", config_file["model_params"]["C"])

    # Logging metrics
    mlflow.log_metric("accuracy", float(round(accuracy_score(y_train, class_), 3)))
    mlflow.log_metric("precision", float(round(roc_auc_score(y_train, pred), 3)))

In [9]:
mlops_run.info.artifact_uri

'file:///C:/Users/rahul/Python/MLOps_Udemy/deploying-titanic-model/mlops_titanic_model/classification_model/mlruns/1/863ec1a4b36f4ab29f039820d4938578/artifacts'

In [17]:
mlflow.tracking.is_tracking_uri_set()

False

In [34]:
X_train

Unnamed: 0,pclass,sex,age,sibsp,parch,fare,cabin,embarked,title
1118,3,male,25.0000,0,0,7.9250,,S,Mr
44,1,female,41.0000,0,0,134.5000,E40,C,Miss
1072,3,male,,0,0,7.7333,,Q,Mr
1130,3,female,18.0000,0,0,7.7750,,S,Miss
574,2,male,29.0000,1,0,21.0000,,S,Mr
...,...,...,...,...,...,...,...,...,...
763,3,female,0.1667,1,2,20.5750,,S,Miss
835,3,male,,0,0,8.0500,,S,Mr
1216,3,female,,0,0,7.7333,,Q,Miss
559,2,female,20.0000,0,0,36.7500,,S,Miss


In [35]:
y_train

1118    0
44      1
1072    0
1130    0
574     0
       ..
763     1
835     0
1216    1
559     1
684     0
Name: survived, Length: 1047, dtype: int64

In [30]:
import pipeline as pipe

In [32]:
pipe = pipe.pipeline()

In [40]:
pipe.fit(X_train, y_train)

Pipeline(steps=[('categorical_imputation',
                 CategoricalImputer(variables=['sex', 'cabin', 'embarked',
                                               'title'])),
                ('missing_indicator',
                 AddMissingIndicator(variables=['age', 'fare'])),
                ('median_imputation',
                 MeanMedianImputer(variables=['age', 'fare'])),
                ('extract_letter',
                 ExtractLetterTransformer(variables=['cabin'])),
                ('rare_label_encoder',
                 RareLabelEncoder(n_categories=1,
                                  variables=['sex', 'cabin', 'embarked',
                                             'title'])),
                ('categorical_encoder',
                 OneHotEncoder(drop_last=True,
                               variables=['sex', 'cabin', 'embarked',
                                          'title'])),
                ('scaler', StandardScaler()),
                ('Logit', LogisticRegress

In [41]:
pipe.predict(X_train)

array([0, 1, 0, ..., 0, 0, 0], dtype=int64)

In [23]:
data = pd.read_csv(config_file["data_source"]["raw_data"])

In [25]:
data.head()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,2.0,,"St Louis, MO"
1,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,11.0,,"Montreal, PQ / Chesterville, ON"
2,1,0,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,,135.0,"Montreal, PQ / Chesterville, ON"
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"


In [26]:
data.replace("?", np.nan)

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0000,0,0,24160,211.3375,B5,S,2,,"St Louis, MO"
1,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.5500,C22 C26,S,11,,"Montreal, PQ / Chesterville, ON"
2,1,0,"Allison, Miss. Helen Loraine",female,2.0000,1,2,113781,151.5500,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.0000,1,2,113781,151.5500,C22 C26,S,,135.0,"Montreal, PQ / Chesterville, ON"
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0000,1,2,113781,151.5500,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1304,3,0,"Zabour, Miss. Hileni",female,14.5000,1,0,2665,14.4542,,C,,328.0,
1305,3,0,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C,,,
1306,3,0,"Zakarian, Mr. Mapriededer",male,26.5000,0,0,2656,7.2250,,C,,304.0,
1307,3,0,"Zakarian, Mr. Ortin",male,27.0000,0,0,2670,7.2250,,C,,,


In [14]:
import pandas as pd

In [37]:
data = pd.read_csv("./data/titanic_data.csv")

In [38]:
data.drop("title", 1)

  data.drop("title", 1)


Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0000,0,0,24160,211.3375,B5,S,2,,"St Louis, MO"
1,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.5500,C22 C26,S,11,,"Montreal, PQ / Chesterville, ON"
2,1,0,"Allison, Miss. Helen Loraine",female,2.0000,1,2,113781,151.5500,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.0000,1,2,113781,151.5500,C22 C26,S,,135.0,"Montreal, PQ / Chesterville, ON"
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0000,1,2,113781,151.5500,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1304,3,0,"Zabour, Miss. Hileni",female,14.5000,1,0,2665,14.4542,,C,,328.0,
1305,3,0,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C,,,
1306,3,0,"Zakarian, Mr. Mapriededer",male,26.5000,0,0,2656,7.2250,,C,,304.0,
1307,3,0,"Zakarian, Mr. Ortin",male,27.0000,0,0,2670,7.2250,,C,,,


In [22]:
data.to_csv(r"./raw_data/titanic_dataset.csv", index= False)

In [9]:
from pathlib import Path

In [15]:
Path("raw_data/titanic_dataset.csv").resolve()

WindowsPath('C:/Users/rahul/Python/MLOps_Udemy/deploying-titanic-model/mlops_titanic_model/classification_model/raw_data/Titanic_Dataset.csv')

In [19]:
data.title[0]

'Miss'

In [23]:
import yaml

In [26]:
with open("config.yaml", "r") as file:
    config_file = yaml.safe_load(file)

In [33]:
config_file["attributes"]["drop_variables"]

['name', 'ticket', 'boat', 'body', 'home.dest']

In [34]:
    name = list()
    name.append(config_file["attributes"]["drop_variables"][0])

In [35]:
name

['name']

In [22]:
 # saving the data copy into data folder
save_to_csv()
new_data = pd.read_csv(Path(config_file["path"]["DATA"]) / "titanic_data.csv")

assert (
        new_data[config_file["attributes"]["drop_variables"][0]].iat[0]
        == "Allen, Miss. Elisabeth Walton"
    )

    # ExtractLetterTransformer
    cleaning_ins = fea.Cleaning(
        variables=list(config_file["attributes"]["drop_variables"][0])
    )

    # when
    new_data_transformed = cleaning_ins.transform(X=new_data)

    # then
    assert (
        new_data_transformed[config_file["attributes"]["drop_variables"][0]].iat[0]
        == "Miss"
    )

IndentationError: unexpected indent (3618114372.py, line 2)

In [2]:
new_data = load_data_testing()

In [3]:
new_data.head(3)

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest,title
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,2.0,,"St Louis, MO",Miss
1,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,11.0,,"Montreal, PQ / Chesterville, ON",Master
2,1,0,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON",Miss


In [63]:
save_to_csv()

In [64]:
from processing import features as fea

In [66]:
ext = fea.ExtractLetterTransformer(variables=["cabin"])

In [69]:
ext.transform(X= new_data)

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest,title
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B,S,2,,"St Louis, MO",Miss
1,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C,S,11,,"Montreal, PQ / Chesterville, ON",Master


In [97]:
new_data["cabin"].iat[1]

'C22 C26'

In [95]:
assert new_data["cabin"][1] == 'C22 C26'

In [81]:
new_data["cabin"] == ext.transform(X= new_data)["cabin"]

0       False
1       False
2       False
3       False
4       False
        ...  
1304    False
1305    False
1306    False
1307    False
1308    False
Name: cabin, Length: 1309, dtype: bool

In [101]:
if "titanic_data.csv" not in [file.name for file in Path(config_file["path"]["DATA"]).iterdir()]:
    new_data = load_data_testing()
    new_data.to_csv(Path(config_file["path"]["DATA"]) / "titanic_data.csv", index= False)
else:
    pass

In [98]:
import pandas as pd

In [103]:
pd.read_csv(Path(config_file["path"]["DATA"])/"titanic_data.csv")

Unnamed: 0.1,Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest,title
0,0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0000,0,0,24160,211.3375,B5,S,2,,"St Louis, MO",Miss
1,1,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.5500,C22 C26,S,11,,"Montreal, PQ / Chesterville, ON",Master
2,2,1,0,"Allison, Miss. Helen Loraine",female,2.0000,1,2,113781,151.5500,C22 C26,S,,,"Montreal, PQ / Chesterville, ON",Miss
3,3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.0000,1,2,113781,151.5500,C22 C26,S,,135.0,"Montreal, PQ / Chesterville, ON",Mr
4,4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0000,1,2,113781,151.5500,C22 C26,S,,,"Montreal, PQ / Chesterville, ON",Mrs
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1304,1304,3,0,"Zabour, Miss. Hileni",female,14.5000,1,0,2665,14.4542,,C,,328.0,,Miss
1305,1305,3,0,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C,,,,Miss
1306,1306,3,0,"Zakarian, Mr. Mapriededer",male,26.5000,0,0,2656,7.2250,,C,,304.0,,Mr
1307,1307,3,0,"Zakarian, Mr. Ortin",male,27.0000,0,0,2670,7.2250,,C,,,,Mr


In [24]:
def armstrongNumber(arr):
	no = 0
	for i in arr:
		arr_str = str(i)
		list_arr = list(arr_str)
		for ele in list_arr:
			no += int(ele)**3
		if no == i:
			print("It is an ARMSTRONG number")
		else:
			print("It is NOT an ARMSTRONG number")
	
armstrongNumber([153,123,367])

It is an ARMSTRONG number
It is NOT an ARMSTRONG number
It is NOT an ARMSTRONG number


In [17]:
for i in [153,123,367]:
	arr_str = str(i)
	list_arr = list(arr_str)
    
	for i in list_arr:
	    no += int(i)**3
	if no == arr:
		print("It is an ARMSTRONG number")
	else:
		print("It is NOT an ARMSTRONG number")

print(list_arr)

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 4)

In [9]:
list("367")

['3', '6', '7']