In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

In [2]:
# Step 1: Load and preprocess the dataset
data = load_iris()
X = data.data
y = data.target
X, y

(array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
        [5

In [3]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
# Directory to save models
import os
model_dir = "models"
os.makedirs(model_dir, exist_ok=True)

In [6]:
# Step 2: Train models with different hyperparameters
hyperparams = [
    {"C": 0.1, "max_iter": 100},
    {"C": 1.0, "max_iter": 200},
    {"C": 10.0, "max_iter": 300},
]

results = []

In [7]:
import pickle
for idx, params in enumerate(hyperparams):
    # Train the model
    model = LogisticRegression(C=params["C"], max_iter=params["max_iter"], random_state=42)
    model.fit(X_train, y_train)

    # Save the model
    model_path = os.path.join(model_dir, f"model_v{idx+1}.pkl")
    with open(model_path, "wb") as f:
        pickle.dump(model, f)

    # Evaluate the model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    # Log the results
    results.append({"version": idx+1, "params": params, "accuracy": accuracy})

    # Step 3: Version the model using Git tags
    os.system(f"git add {model_path}")
    os.system(f"git commit -m 'Add model version {idx+1} with params {params}'")
    os.system(f"git tag -a v{idx+1} -m 'Version {idx+1} with accuracy {accuracy}'")
    os.system(f"git push origin main --tags")

[main 3d6955d] Add model version 1 with params {C: 0.1, max_iter: 100}
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 models/model_v1.pkl


fatal: could not read Username for 'https://github.com': No such device or address


[main 8582fd9] Add model version 2 with params {C: 1.0, max_iter: 200}
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 models/model_v2.pkl


fatal: could not read Username for 'https://github.com': No such device or address


[main 9722500] Add model version 3 with params {C: 10.0, max_iter: 300}
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 models/model_v3.pkl


fatal: could not read Username for 'https://github.com': No such device or address


In [8]:
# Display results
results_df = pd.DataFrame(results)
print(results_df)

   version                        params  accuracy
0        1   {'C': 0.1, 'max_iter': 100}  0.966667
1        2   {'C': 1.0, 'max_iter': 200}  1.000000
2        3  {'C': 10.0, 'max_iter': 300}  1.000000


In [9]:
# Save results to a CSV file
results_df.to_csv("model_results.csv", index=False)