In [None]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import KFold

X, y = make_regression(n_samples=100, n_features=1, random_state=42, noise=10)

model = LinearRegression()

k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)

mse_scores = []
r2_scores = []

for fold, (train_index, test_index) in enumerate(kf.split(X),1):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    mse_scores.append(mse)
    r2_scores.append(r2)

    print(f"Fold {fold}: MSE = {mse:.4f}, R^2 = {r2:.4f}")

    avg_mse = np.mean(mse_scores)
    avg_r2 = np.mean(r2_scores)

print(f"\nAverage MSE: {avg_mse:.4f}")
print(f"Average R^2: {avg_r2:.4f}")

Fold 1: MSE = 104.2022, R^2 = 0.9374
Fold 2: MSE = 66.5207, R^2 = 0.9701
Fold 3: MSE = 63.1745, R^2 = 0.9707
Fold 4: MSE = 69.4737, R^2 = 0.9326
Fold 5: MSE = 105.1747, R^2 = 0.9094

Average MSE: 81.7092
Average R^2: 0.9440


In [None]:
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

X, y = make_regression(n_samples=100, n_features=1, random_state=42, noise=10)
X = X.reshape(-1,1)

model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression())

param_grid = {'polynomialfeatures__degree': [1,2,3],
              'linearregression__fit_intercept': [True, False]}

k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=kf, scoring='neg_mean_squared_error',return_train_score=True)

grid_search.fit(X, y)

print("Best Parameters: ", grid_search.best_params_)
print("Best cross-validation score (neg MSE): ", grid_search.best_score_)

best_model = grid_search.best_estimator_

mse_scores = []
r2_scores = []

for fold, (train_index, test_index) in enumerate(kf.split(X),1):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    best_model.fit(X_train, y_train)
    y_pred = best_model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    mse_scores.append(mse)
    r2_scores.append(r2)

    print(f"Fold {fold}: MSE = {mse:.4f}, R^2 = {r2:.4f}")

avg_mse = np.mean(mse_scores)
avg_r2 = np.mean(r2_scores)
print(f"\nAverage MSE across {k} folds: {avg_mse:.4f}")
print(f"Average R^2 across {k} folds: {avg_r2:.4f}")

Best Parameters:  {'linearregression__fit_intercept': True, 'polynomialfeatures__degree': 1}
Best cross-validation score (neg MSE):  -81.70917788826905
Fold 1: MSE = 104.2022, R^2 = 0.9374
Fold 2: MSE = 66.5207, R^2 = 0.9701
Fold 3: MSE = 63.1745, R^2 = 0.9707
Fold 4: MSE = 69.4737, R^2 = 0.9326
Fold 5: MSE = 105.1747, R^2 = 0.9094

Average MSE across 5 folds: 81.7092
Average R^2 across 5 folds: 0.9440


In [None]:
import wandb
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
import joblib

#1. initialize W&B PROJRECT
wandb.init(project="basic-intro2", config={
    "test_size": 0.2,
    "random_state": 42,
    "max_iter": 100, # Increased max_iter
    "solver": "lbfgs"
})

config= wandb.config

#load simple dataset (iris)
iris = load_iris()
x= iris.data
y= iris.target
class_names = iris.target_names

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=config.test_size,random_state=config.random_state)

#train logistic regression
model = LogisticRegression(max_iter=config.max_iter,solver=config.solver,multi_class= "auto")
model.fit(x_train,y_train)

# predictions
y_pred = model.predict(x_test)

#metrics
acc= accuracy_score(y_test,y_pred)
wandb.log({"accuracy":acc})
print("Accuracy:", acc)
print(classification_report(y_test,y_pred,target_names=class_names))


#confusion metrics plot
cm= confusion_matrix(y_test,y_pred)
plt.figure(figsize=(6,4))
sns.heatmap(cm,annot=True,fmt="d",cmap="Blues",xticklabels=class_names,yticklabels=class_names)
plt.xlabel("Predicted label")
plt.ylabel("True label")
plt.title("Confusion Matrix")
wandb.log({"confusion_matrix": wandb.Image(plt)})
plt.close()

#log classification metrics as w&b table

report = classification_report(y_test,y_pred,target_names=class_names,output_dict=True)
for cls,metrics in report.items():
    if isinstance(metrics,dict):
        wandb.log({f"{cls}_precision":metrics["precision"],f"{cls}_recall":metrics["recall"],f"{cls}_f1-score":metrics["f1-score"]})

# save model locally
joblib.dump(model,"logistic_Reg.pkl")

#log the model file as an artifact
artifact= wandb.Artifact("lr_model",type="model")
artifact.add_file("logistic_Reg.pkl")
wandb.log_artifact(artifact)

wandb.finish()

0,1
accuracy,▁
macro avg_f1-score,▁
macro avg_precision,▁
macro avg_recall,▁
setosa_f1-score,▁
setosa_precision,▁
setosa_recall,▁
versicolor_f1-score,▁
versicolor_precision,▁
versicolor_recall,▁

0,1
accuracy,0.66667
macro avg_f1-score,0.60317
macro avg_precision,0.82456
macro avg_recall,0.69697
setosa_f1-score,1
setosa_precision,1
setosa_recall,1
versicolor_f1-score,0.64286
versicolor_precision,0.47368
versicolor_recall,1




Accuracy: 1.0
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



0,1
accuracy,▁
macro avg_f1-score,▁
macro avg_precision,▁
macro avg_recall,▁
setosa_f1-score,▁
setosa_precision,▁
setosa_recall,▁
versicolor_f1-score,▁
versicolor_precision,▁
versicolor_recall,▁

0,1
accuracy,1
macro avg_f1-score,1
macro avg_precision,1
macro avg_recall,1
setosa_f1-score,1
setosa_precision,1
setosa_recall,1
versicolor_f1-score,1
versicolor_precision,1
versicolor_recall,1
