In [1]:
from sklearn.model_selection import train_test_split

In [3]:
import pandas as pd 
import mlflow
from mlflow.models import infer_signature

In [5]:
df = pd.read_csv("titanic_train.csv") # importing csv file

In [6]:
df.shape # checking the shape of the dataframe

(891, 12)

In [7]:
df.isna().sum() # checking null values in each column

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [8]:
median_age = df["Age"].median() # calculating the median value for the 'Age' column
median_age

28.0

In [9]:
# filling the null values in the 'Age' column with the calculated median value
df["Age"] = df["Age"].fillna(median_age)

In [10]:
df["Age"].isna().sum() # checking if the null values have successfully been filled or not

0

In [11]:
# dropping the rows in which the 'Embarked' column values are null
df = df.dropna(subset=["Embarked"]) 

In [12]:
df.shape # checking if the rows has been dropped

(889, 12)

In [13]:
# we drop the 'Cabin' column because its majority portion consists of null values
# also dropping the columns 'PassengerId' and 'Name' as they aren't relevant
df = df.drop(columns=["Cabin", "PassengerId", "Name"])

In [14]:
df.isna().sum() # checking if the columns have been dropped

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Ticket      0
Fare        0
Embarked    0
dtype: int64

In [15]:
y = df["Survived"] # selecting 'Survived' as the dependent variable
y

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 889, dtype: int64

In [16]:
# mapping 'male' and 'female' as 0 and 1 respectively
df["Sex"] = df["Sex"].map({"male":0, "female":1})

In [17]:
# checking the value_counts of distinct entities in 'Sex' column after mapping
df["Sex"].value_counts() 

Sex
0    577
1    312
Name: count, dtype: int64

In [18]:
# selecting 'Pclass', 'Age' and 'Sex' as the independent variables
X = df[["Pclass", "Age", "Sex"]]

In [19]:
# splitting the data into training and testing sets 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [20]:
from sklearn.linear_model import LogisticRegression

In [27]:
params = {
  "solver": "lbfgs",
  "max_iter": 1000,
  "random_state": 8888,
}

In [28]:
model = LogisticRegression(**params) # creating an instance of logistic regression

In [29]:
model.fit(X_train, y_train) # training the model using training set

In [30]:
y_pred = model.predict(X_test) # calculating the predicted values of the model on testing set

In [31]:
from sklearn.metrics import accuracy_score

In [32]:
# calculating the accuracy of the model on testing set
accuracy = accuracy_score(y_test, y_pred)
accuracy

0.7921348314606742

In [33]:
# creating custom data
custom_data = pd.DataFrame([{'Pclass':1, 'Age':40, 'Sex':1}])

In [34]:
pred = model.predict(custom_data) # predicting on custom data
print('Survived' if pred==1 else 'NotSurvived')

Survived


In [37]:
mlflow.set_tracking_uri(uri="http://localhost:3000")

In [38]:
mlflow.set_experiment("Logistic Regression Amnil")

2025/09/22 10:08:16 INFO mlflow.tracking.fluent: Experiment with name 'Logistic Regression Amnil' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/772418750015267640', creation_time=1758514996990, experiment_id='772418750015267640', last_update_time=1758514996990, lifecycle_stage='active', name='Logistic Regression Amnil', tags={}>

In [39]:
with mlflow.start_run():
  mlflow.log_params(params)
  mlflow.log_metric("accuracy", accuracy)
  mlflow.set_tag("Logistic Regresstion", "Amnil_day5")
  signature = infer_signature(X_train, model.predict(X_train))
  model_info = mlflow.sklearn.log_model(
    sk_model=model,
    registered_model_name="LogisticRegression_Amnil",
    signature=signature,
    input_example=X_train
  )



Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Successfully registered model 'LogisticRegression_Amnil'.
2025/09/22 10:17:41 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: LogisticRegression_Amnil, version 1
Created version '1' of model 'LogisticRegression_Amnil'.


🏃 View run nimble-shark-96 at: http://localhost:3000/#/experiments/772418750015267640/runs/460f3fd403ad476387936cf3284335d8
🧪 View experiment at: http://localhost:3000/#/experiments/772418750015267640


In [40]:
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

In [41]:
new_pred = loaded_model.predict(X_test)

In [42]:
new_acc = accuracy_score(y_test, new_pred)
new_acc

0.7921348314606742