In [11]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


In [12]:
df = pd.read_csv("retail_regression.csv")
df.head()


Unnamed: 0,Sales,Discount,Quantity,Month,Profit
0,200,0.1,2,1,30
1,450,0.2,5,2,80
2,300,0.05,3,3,60
3,500,0.15,6,4,90
4,150,0.0,1,5,20


In [13]:
X = df.drop("Profit", axis=1)
y = df["Profit"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

rmse_tt = np.sqrt(mean_squared_error(y_test, y_pred))
r2_tt = r2_score(y_test, y_pred)

print("Train-Test RMSE:", rmse_tt)
print("Train-Test R2:", r2_tt)


Train-Test RMSE: 1.7352439125932082
Train-Test R2: 0.9924723214095206


In [14]:
kf5 = KFold(n_splits=5, shuffle=True, random_state=42)

cv_mse_5 = -cross_val_score(
    model, X, y,
    scoring="neg_mean_squared_error",
    cv=kf5
)

cv_rmse_5 = np.sqrt(cv_mse_5)

print("K=5 RMSE Scores:", cv_rmse_5)
print("Mean RMSE (K=5):", cv_rmse_5.mean())


K=5 RMSE Scores: [ 1.73524391  7.53347073 11.47715485  6.15549571  6.61462658]
Mean RMSE (K=5): 6.703198356419653


In [15]:
kf10 = KFold(n_splits=10, shuffle=True, random_state=42)

cv_mse_10 = -cross_val_score(
    model, X, y,
    scoring="neg_mean_squared_error",
    cv=kf10
)

cv_rmse_10 = np.sqrt(cv_mse_10)

print("K=10 RMSE Scores:", cv_rmse_10)
print("Mean RMSE (K=10):", cv_rmse_10.mean())


K=10 RMSE Scores: [ 1.15323604  2.50502008  9.17886012 10.56803452 19.80577271 12.38800063
  5.5920157   4.82631474  1.1272262   8.08237855]
Mean RMSE (K=10): 7.52268592859779


In [16]:
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

df_cls = pd.read_csv("student_performance.csv")

X_cls = df_cls.drop("Pass", axis=1)
y_cls = df_cls["Pass"]


In [17]:
X_train, X_test, y_train, y_test = train_test_split(
    X_cls, y_cls, test_size=0.2, random_state=42, stratify=y_cls
)

clf = LogisticRegression()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print("Train-Test Accuracy:", accuracy_score(y_test, y_pred))


Train-Test Accuracy: 1.0


In [18]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

cv_acc = []

for train_idx, test_idx in skf.split(X_cls, y_cls):
    X_train, X_test = X_cls.iloc[train_idx], X_cls.iloc[test_idx]
    y_train, y_test = y_cls.iloc[train_idx], y_cls.iloc[test_idx]

    clf = LogisticRegression()
    clf.fit(X_train, y_train)

    preds = clf.predict(X_test)
    cv_acc.append(accuracy_score(y_test, preds))

print("Stratified K-Fold Accuracies:", cv_acc)
print("Mean CV Accuracy:", np.mean(cv_acc))


Stratified K-Fold Accuracies: [1.0, 1.0, 1.0, 1.0, 1.0]
Mean CV Accuracy: 1.0
