In [None]:
import mlflow
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [21]:
# Load Titanic dataset from local Kaggle CSV
df = pd.read_csv("train.csv")

In [22]:
df.head(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [23]:
# Drop rows with missing values in selected features
df = df.dropna(subset=["Age", "Embarked", "Fare"])

In [24]:
# Encode categorical columns
df["Sex"]=df["Sex"].map({"male": 0, "female": 1})
df["Embarked"] = df["Embarked"].astype("category").cat.codes


In [25]:
# Select features and target
features = ["Pclass", "Sex", "Age", "Fare", "Embarked"]
X = df[features]
y = df["Survived"]


In [26]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# MLflow experiment
mlflow.set_experiment("Titanic_Kaggle_Experiment")

with mlflow.start_run():
    clf = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
    clf.fit(X_train, y_train)

    preds= clf.predict(X_test)
    acc=accuracy_score(y_test, preds)


In [16]:
# Log parameters and metrics
mlflow.log_param("n_estimators",100)
mlflow.log_param("max_depth",5)
mlflow.log_metric("accuracy",acc)

In [17]:
# Log the model
mlflow.sklearn.log_model(clf, "model")




<mlflow.models.model.ModelInfo at 0x22103505ab0>

In [18]:
print(f"Model trained and logged with accuracy: {acc}")


Model trained and logged with accuracy: 0.6363636363636364
