In [7]:
import pandas as pd
X=pd.read_csv('../data/titanic.csv')
X["Sex"] = X["Sex"].map({"male": 0, "female": 1})
data=X.drop(columns=["Survived","Name"])
target=X["Survived"]
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report
#We can split our training and testing datasets.
data_train, data_test, target_train, target_test = train_test_split(
    data, target, random_state=42, test_size=0.25
)
print(
    f"Number of samples in testing: {data_test.shape[0]} => "
    f"{data_test.shape[0] / data.shape[0] * 100:.1f}% of the"
    " original set"
)
print(
    f"Number of samples in training: {data_train.shape[0]} => "
    f"{data_train.shape[0] / data.shape[0] * 100:.1f}% of the"
    " original set"
)
from sklearn.linear_model import LogisticRegression
model=LogisticRegression()
l_model=model.fit(data_train,target_train)
target_predicated=l_model.predict(data_test)
accuracy = accuracy_score(target_test, target_predicated)
print(f"Model Accuracy for Logestic Regression: {accuracy * 100:.2f}%")
print(f"Classification Report for Logistic Regression:{classification_report(target_test,target_predicated)}")
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
model=RandomForestClassifier(n_estimators=100,random_state=42)
r_model=model.fit(data_train,target_train)
target_predicated=r_model.predict(data_test)
accuracy = accuracy_score(target_test, target_predicated)
print(f"Model Accuracy for Random Forest: {accuracy * 100:.2f}%")
print(f"Classification Report for Random Forest:{classification_report(target_test,target_predicated)}")
import pickle 
pickle.dump(r_model,open('model.pkl','wb'))

Number of samples in testing: 222 => 25.0% of the original set
Number of samples in training: 665 => 75.0% of the original set
Model Accuracy for Logestic Regression: 76.58%
Classification Report for Logistic Regression:              precision    recall  f1-score   support

           0       0.78      0.87      0.82       140
           1       0.73      0.59      0.65        82

    accuracy                           0.77       222
   macro avg       0.75      0.73      0.74       222
weighted avg       0.76      0.77      0.76       222

Model Accuracy for Random Forest: 81.08%
Classification Report for Random Forest:              precision    recall  f1-score   support

           0       0.85      0.86      0.85       140
           1       0.75      0.73      0.74        82

    accuracy                           0.81       222
   macro avg       0.80      0.79      0.80       222
weighted avg       0.81      0.81      0.81       222



In [None]:
models = [
    (
        "Logistic Regression", 
        {
    "class_weight":None, "random_state":8888, "solver":"lbfgs", "max_iter":100,},
        LogisticRegression(), 
    ),
    (
        "Random Forest", 
        { "n_estimators":100,
    "random_state":42,},
        RandomForestClassifier(), 
    ),
    (
        "Gradient Boosting Classifier", 
        {
   "n_estimators":100, "learning_rate":1.0,"max_depth":1, "random_state":0
},
        GradientBoostingClassifier(), 
    ),
    ]



In [12]:
from sklearn.linear_model import LogisticRegression
reports=[]
lrparams={
    "class_weight":None, "random_state":8888, "solver":"lbfgs", "max_iter":100,
}
model=LogisticRegression(**lrparams)
l_model=model.fit(data_train,target_train)
target_predicated=l_model.predict(data_test)
accuracy = accuracy_score(target_test, target_predicated)
reports.append(classification_report(target_test,target_predicated,output_dict=True))

from sklearn.ensemble import RandomForestClassifier
rfparams={
    "n_estimators":100,
    "random_state":42,
}
model=RandomForestClassifier(**rfparams)
r_model=model.fit(data_train,target_train)
target_predicated=r_model.predict(data_test)
accuracy = accuracy_score(target_test, target_predicated)
reports.append(classification_report(target_test,target_predicated,output_dict=True))
gbparams={
   "n_estimators":100, "learning_rate":1.0,"max_depth":1, "random_state":0
}
model=GradientBoostingClassifier(**gbparams)
g_model=model.fit(data_train,target_train)
target_predicated=g_model.predict(data_test)
accuracy = accuracy_score(target_test, target_predicated)
reports.append(classification_report(target_test,target_predicated,output_dict=True))


In [15]:
import mlflow

mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("Accuracy Model v3")

for i, element in enumerate(models):
    model_name = element[0]
    params = element[1]
    model = element[2]
    report = reports[i]
    
    with mlflow.start_run(run_name=model_name):        
        mlflow.log_params(params)
        mlflow.log_metrics({
            'accuracy': report['accuracy'],
            'recall_class_1': report['1']['recall'],
            'recall_class_0': report['0']['recall'],
            'f1_score_macro': report['macro avg']['f1-score']
        })  

        mlflow.sklearn.log_model(model, "model", registered_model_name=model_name)
  

    

2025/07/10 15:24:48 INFO mlflow.tracking.fluent: Experiment with name 'Accuracy Model v3' does not exist. Creating a new experiment.
Successfully registered model 'Logistic Regression'.
2025/07/10 15:24:53 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Logistic Regression, version 1
Created version '1' of model 'Logistic Regression'.


🏃 View run Logistic Regression at: http://localhost:5000/#/experiments/805679247893470161/runs/d3620872486a4c0fa503951a163f2cce
🧪 View experiment at: http://localhost:5000/#/experiments/805679247893470161


Successfully registered model 'Random Forest'.
2025/07/10 15:24:55 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Random Forest, version 1
Created version '1' of model 'Random Forest'.


🏃 View run Random Forest at: http://localhost:5000/#/experiments/805679247893470161/runs/bc7db7546aff47d4a58315e71d9f6715
🧪 View experiment at: http://localhost:5000/#/experiments/805679247893470161


Successfully registered model 'Gradient Boosting Classifier'.
2025/07/10 15:24:58 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Gradient Boosting Classifier, version 1


🏃 View run Gradient Boosting Classifier at: http://localhost:5000/#/experiments/805679247893470161/runs/a7eb30d9ddc4430d898f3adc2994e85b
🧪 View experiment at: http://localhost:5000/#/experiments/805679247893470161


Created version '1' of model 'Gradient Boosting Classifier'.
