In [1]:
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [2]:
import pandas as pd

# Load datasets
train_df = pd.read_csv('../Data/train.csv')
test_df = pd.read_csv ('../Data/test.csv')

# Inspect the data
print(train_df.head())
print(test_df.head())


   Unnamed: 0  Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin  \
0           0            2      108             80              0        0   
1           1            0      173             78             32      265   
2           2            1      120             80             48      200   
3           3            0      121             66             30      165   
4           4            1      126             56             29      152   

    BMI  DiabetesPedigreeFunction  Age  Outcome  
0  27.0                     0.259   52        1  
1  46.5                     1.159   58        0  
2  38.9                     1.162   41        0  
3  34.3                     0.203   33        1  
4  28.7                     0.801   21        0  
   Unnamed: 0  Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin  \
0         614            2       87             58             16       52   
1         615           10       92             62              0        0 

In [5]:
mlflow.set_tracking_uri("file:///C:/Users/HP/MLOPS_Assignment1/new_mlruns")


In [14]:
model_save_path = 'C:/Users/HP/MLOPS_Assignment1/Models/random_forest_model.pkl'

In [15]:
mlflow.set_experiment("MLOPS_Expreiment")

# Start an MLflow run
with mlflow.start_run():
    # Define model parameters
    n_estimators = 100
    max_depth = 10

    # Log parameters
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)

    # Prepare your data
    X_train = train_df.drop(columns=["Outcome"])  # Replace with your target column
    y_train = train_df["Outcome"]
    X_test = test_df.drop(columns=["Outcome"])
    y_test = test_df["Outcome"]

    # Train the model
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    model.fit(X_train, y_train)

    # Predict on test set
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)

    # Log metrics
    mlflow.log_metric("accuracy", accuracy)

    # Log the model
    mlflow.sklearn.log_model(model, "random_forest_model")

    print(f"Model logged with accuracy: {accuracy}")

print("Current tracking URI:", mlflow.get_tracking_uri())
    



Model logged with accuracy: 0.8116883116883117
Current tracking URI: file:///C:/Users/HP/MLOPS_Assignment1/new_mlruns
Model saved to: C:/Users/HP/MLOPS_Assignment1/Models/random_forest_model.pkl


In [16]:
import pickle

with open(model_save_path, 'wb') as f:
    pickle.dump(model, f)

print(f"Model saved to: {model_save_path}")

Model saved to: C:/Users/HP/MLOPS_Assignment1/Models/random_forest_model.pkl


In [18]:
from sklearn.svm import SVC

model_save_path = 'C:/Users/HP/MLOPS_Assignment1/Models/svm_model.pkl'

mlflow.set_experiment("SVM_MODEL")
with mlflow.start_run():
    # Define model parameters
    C = 1.0
    kernel = 'linear'

    # Log parameters
    mlflow.log_param("C", C)
    mlflow.log_param("kernel", kernel)

    # Prepare your data
    X_train = train_df.drop(columns=["Outcome"])  # Replace with your target column
    y_train = train_df["Outcome"]
    X_test = test_df.drop(columns=["Outcome"])
    y_test = test_df["Outcome"]

    # Train the model
    model = SVC(C=C, kernel=kernel, random_state=42)
    model.fit(X_train, y_train)

    # Predict on test set
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)

    # Log metrics
    mlflow.log_metric("accuracy", accuracy)

    # Log the model in MLflow
    mlflow.sklearn.log_model(model, "svm_model")
    print(f"Model logged with accuracy: {accuracy}")
   




Model logged with accuracy: 0.8246753246753247


In [19]:
import pickle

with open(model_save_path, 'wb') as f:
    pickle.dump(model, f)

print(f"Model saved to: {model_save_path}")

Model saved to: C:/Users/HP/MLOPS_Assignment1/Models/svm_model.pkl


In [20]:
import mlflow
import mlflow.sklearn
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import os
import pickle

model_save_path = 'C:/Users/HP/MLOPS_Assignment1/Models/knn_model.pkl'

# Set MLflow experiment
mlflow.set_experiment("KNN_MODEL")

# Load your dataset
train_df = pd.read_csv('C:/Users/HP/MLOPS_Assignment1/Data/train.csv')
test_df = pd.read_csv('C:/Users/HP/MLOPS_Assignment1/Data/test.csv')

# Start an MLflow run
with mlflow.start_run():
    # Define model parameters
    n_neighbors = 5
    weights = 'uniform'  # or 'distance'

    # Log parameters
    mlflow.log_param("n_neighbors", n_neighbors)
    mlflow.log_param("weights", weights)

    # Prepare your data
    X_train = train_df.drop(columns=["Outcome"])  # Replace with your target column
    y_train = train_df["Outcome"]
    X_test = test_df.drop(columns=["Outcome"])
    y_test = test_df["Outcome"]

    # Train the model
    model = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights)
    model.fit(X_train, y_train)

    # Predict on test set
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)

    # Log metrics
    mlflow.log_metric("accuracy", accuracy)

    # Log the model in MLflow
    mlflow.sklearn.log_model(model, "knn_model")

    # Save the model to your 'models/saved_model/' directory
    

    print(f"Model logged with accuracy: {accuracy}")
    print(f"Model saved to: {model_save_path}")


2024/08/04 17:30:51 INFO mlflow.tracking.fluent: Experiment with name 'KNN_MODEL' does not exist. Creating a new experiment.


Model logged with accuracy: 0.7337662337662337
Model saved to: C:/Users/HP/MLOPS_Assignment1/Models/knn_model.pkl


In [21]:
import pickle

with open(model_save_path, 'wb') as f:
    pickle.dump(model, f)

print(f"Model saved to: {model_save_path}")

Model saved to: C:/Users/HP/MLOPS_Assignment1/Models/knn_model.pkl


In [17]:
!mlflow ui

^C


In [14]:
import mlflow

mlflow.set_experiment("Test_Experiment")

with mlflow.start_run():
    mlflow.log_param("param1", "value1")
    mlflow.log_metric("metric1", 0.1)
    print("Logged test experiment")

print("Current tracking URI:", mlflow.get_tracking_uri())

Logged test experiment
Current tracking URI: file:///C:/Users/HP/MLOPS_Assignment1/notebooks/mlruns


In [12]:
!mlflow ui

^C


In [None]:
import mlflow

print("Current tracking URI:", mlflow.get_tracking_uri())