In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

# Load dataset
df = pd.read_csv("dummy_dataset.csv")

X = df[["Feature1", "Feature2"]]
y = df["Label"]


In [2]:
# Split into Train+Validation and Test
X_train_val, X_test, y_train_val, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [3]:

# Split Train+Validation into Train and Validation
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=0.25, random_state=42, stratify=y_train_val
)

print("Train size:", X_train.shape)
print("Validation size:", X_val.shape)
print("Test size:", X_test.shape)

Train size: (3000, 2)
Validation size: (1000, 2)
Test size: (1000, 2)


In [4]:
# Initialize the classifier
model = LogisticRegression(max_iter=1000)

In [5]:
# Train only on the Training set
model.fit(X_train, y_train)

print("✅ Model training complete on the training set!")

✅ Model training complete on the training set!


In [6]:
from sklearn.metrics import accuracy_score, classification_report

In [7]:
# Predict on Validation set
y_val_pred = model.predict(X_val)


In [8]:
# Evaluate performance
print("✅ Validation Complete")
print("Validation Accuracy:", accuracy_score(y_val, y_val_pred))
print("\nClassification Report:\n", classification_report(y_val, y_val_pred))

✅ Validation Complete
Validation Accuracy: 0.999

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       502
           1       1.00      1.00      1.00       498

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000



In [9]:
# Optional: see some predictions vs actual
val_results = pd.DataFrame({
    "Feature1": X_val["Feature1"],
    "Feature2": X_val["Feature2"],
    "Actual": y_val.values,
    "Predicted": y_val_pred
})
val_results.head(10)

Unnamed: 0,Feature1,Feature2,Actual,Predicted
1180,51.798942,34.820779,1,1
2840,52.956245,29.685531,1,1
4718,57.228269,32.974272,1,1
3213,65.232386,25.108672,1,1
3389,51.390604,23.732991,0,0
3469,47.813467,27.170314,0,0
4429,45.171724,40.134086,1,1
304,49.790984,28.7135,0,0
1444,53.99688,33.920732,1,1
317,46.897332,28.897309,0,0


In [10]:
# Predict on Test set
y_test_pred = model.predict(X_test)

In [11]:

# Evaluate performance
from sklearn.metrics import accuracy_score, classification_report

print("✅ Testing Complete")
print("Test Accuracy:", accuracy_score(y_test, y_test_pred))
print("\nClassification Report:\n", classification_report(y_test, y_test_pred))

✅ Testing Complete
Test Accuracy: 0.999

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       502
           1       1.00      1.00      1.00       498

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000



In [12]:
# Optional: compare actual vs predicted
test_results = pd.DataFrame({
    "Feature1": X_test["Feature1"],
    "Feature2": X_test["Feature2"],
    "Actual": y_test.values,
    "Predicted": y_test_pred
})
test_results.head(10)

Unnamed: 0,Feature1,Feature2,Actual,Predicted
3592,73.036388,39.679305,1,1
2675,30.373743,26.287566,0,0
4163,31.636402,35.339989,0,0
3921,52.242347,31.963842,1,1
3588,62.39584,26.501996,1,1
2789,51.814266,35.63894,1,1
1331,62.187619,23.209125,1,1
479,61.195749,35.494917,1,1
4831,38.136636,25.926572,0,0
1054,44.496948,31.706079,0,0


In [13]:
# Optional: compare actual vs predicted
test_results = pd.DataFrame({
    "Feature1": X_test["Feature1"],
    "Feature2": X_test["Feature2"],
    "Actual": y_test.values,
    "Predicted": y_test_pred
})
test_results.tail(15)

Unnamed: 0,Feature1,Feature2,Actual,Predicted
1266,48.887739,30.366684,0,0
4722,50.262048,30.515391,1,1
1073,37.170078,32.263782,0,0
3783,38.277662,24.176683,0,0
1495,70.060929,31.907128,1,1
889,61.087036,28.246109,1,1
3031,43.417516,37.509765,1,1
1094,57.326401,30.360766,1,1
4610,50.678385,30.639636,1,1
3895,60.016318,40.228273,1,1
