Run:  python -m ipykernel install --user --name cicd-ml --display-name "Python (cicd-ml)"
You should see: Installed kernelspec cicd-ml in /home/lara/.local/share/jupyter/kernels/cicd-ml

In [2]:
import pandas as pd
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

# 1) Load data
df = pd.read_csv("data/reviews_fail.csv")  # must have columns: text, sentiment

# 2) If no 'month' column, simulate one (70% months 1-2, 30% month 3)
if "month" not in df.columns:
    n = len(df)
    cut1 = int(n * 0.35)    # month 1
    cut2 = int(n * 0.70)    # month 2
    df["month"] = 1
    df.loc[cut1:cut2, "month"] = 2
    df.loc[cut2:, "month"] = 3

# 3) Train on months 1 & 2, test on month 3 (out-of-time)
train = df[df["month"].isin([1, 2])]
test  = df[df["month"] == 3].copy()

# 4) Build simple text model (TF-IDF + LinearSVC)
C = 1.0  # try 1.0 (reasonable), or 0.001 (too rigid), or 5.0 (more flexible)
pipe = make_pipeline(
    TfidfVectorizer(ngram_range=(1, 2), lowercase=True, min_df=1),
    LinearSVC(C=C, random_state=42)
)

# 5) Fit and predict (no leakage: vectorizer fits on train only inside the pipeline)
pipe.fit(train["text"], train["sentiment"])
test["pred"] = pipe.predict(test["text"])
test["correct"] = (test["pred"] == test["sentiment"]).astype(int)

# 6) Report and save
acc = test["correct"].mean()
print(f"Month 3 accuracy: {acc:.3f}  (C={C})")
cols = ["text", "sentiment", "pred", "correct"]
display(test[cols].head(20))      # in Jupyter this prints a nice table
test[cols].to_csv("data/predictions_month3.csv", index=False)
print("Saved: data/predictions_month3.csv")


Month 3 accuracy: 0.333  (C=1.0)


Unnamed: 0,text,sentiment,pred,correct
5,"the tablet performs solid for daily use, very ...",1,1,1
6,"the blender looks unreliable, quality is poor",0,1,0
7,the watch feels unreliable,0,1,0


Saved: data/predictions_month3.csv
