In [1]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LogisticRegression

# Load your existing dataset
df = pd.read_csv("dummy_dataset.csv")

# Features and label
X = df[["Feature1", "Feature2"]]
y = df["Label"]

print("✅ Dataset loaded successfully")


✅ Dataset loaded successfully


In [2]:
# 5-Fold Cross Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Initialize Logistic Regression model
model = LogisticRegression(max_iter=1000)


In [3]:
# Evaluate the model using cross_val_score
scores = cross_val_score(model, X, y, cv=kf, scoring='accuracy')

print("✅ K-Fold Cross Validation Completed")
print("Accuracies for each fold:", scores)
print("Mean Accuracy:", scores.mean())
print("Standard Deviation:", scores.std())


✅ K-Fold Cross Validation Completed
Accuracies for each fold: [1.    1.    0.998 0.998 1.   ]
Mean Accuracy: 0.9992000000000001
Standard Deviation: 0.0009797958971132722


In [5]:
C_values = [0.01, 0.1, 1, 10, 100]
best_C = None
best_score = 0

for c in C_values:
    temp_model = LogisticRegression(C=c, max_iter=1000)
    scores = cross_val_score(temp_model, X, y, cv=kf, scoring='accuracy')
    mean_score = scores.mean()
    print(f"C={c} → Mean CV Accuracy: {mean_score:.4f}")
    
    if mean_score > best_score:
        best_score = mean_score
        best_C = c

print("\n✅ Best C value:", best_C, "→ Mean CV Accuracy:", best_score)


C=0.01 → Mean CV Accuracy: 0.9976
C=0.1 → Mean CV Accuracy: 0.9992
C=1 → Mean CV Accuracy: 0.9992
C=10 → Mean CV Accuracy: 0.9990
C=100 → Mean CV Accuracy: 0.9990

✅ Best C value: 0.1 → Mean CV Accuracy: 0.9992000000000001


In [6]:
# Train final model on full dataset with best C
final_model = LogisticRegression(C=best_C, max_iter=1000)
final_model.fit(X, y)

print("✅ Final model trained on full dataset")


✅ Final model trained on full dataset


In [8]:
# Predict on the full dataset
y_pred = final_model.predict(X)

# Create a DataFrame to compare
results = pd.DataFrame({
    "Feature1": X["Feature1"],
    "Feature2": X["Feature2"],
    "Actual": y,
    "Predicted": y_pred
})

# Show first 10 rows
results.head(10)



Unnamed: 0,Feature1,Feature2,Actual,Predicted
0,54.967142,27.881202,1,1
1,48.617357,27.732929,0,0
2,56.476885,21.021784,0,0
3,65.230299,28.349549,1,1
4,47.658466,33.664145,1,1
5,47.65863,23.628839,0,0
6,65.792128,35.242413,1,1
7,57.674347,32.438874,1,1
8,45.305256,26.328833,0,0
9,55.4256,29.292352,1,1


In [9]:
results.to_csv("predicted_vs_actual.csv", index=False)
print("✅ Predictions saved to predicted_vs_actual.csv")


✅ Predictions saved to predicted_vs_actual.csv
