In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Generate Synthetic Dataset
np.random.seed(42)

n_samples = 1000
gre_scores = np.random.randint(290, 340, n_samples)  # GRE score range
toefl_scores = np.random.randint(90, 120, n_samples)  # TOEFL score range
lor = np.random.randint(1, 6, n_samples)  # 1 to 5
papers = np.random.randint(0, 6, n_samples)  # 0 to 5
sop_scores = np.round(np.random.uniform(1, 5, n_samples), 1)  # SOP score range: 1.0 to 5.0

# Admission logic (synthetic for demonstration)
admitted = (
    (gre_scores > 310) & 
    (toefl_scores > 100) & 
    (lor >= 3) & 
    ((papers > 1) | (sop_scores >= 4.0))
).astype(int)

# Create a DataFrame
data = pd.DataFrame({
    "GRE Score": gre_scores,
    "TOEFL Score": toefl_scores,
    "LORs": lor,
    "Papers Published": papers,
    "SOP Score": sop_scores,
    "Admitted": admitted
})

# Step 2: Split the Dataset
X = data[["GRE Score", "TOEFL Score", "LORs", "Papers Published", "SOP Score"]]
y = data["Admitted"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Train Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 4: Make Predictions
y_pred = model.predict(X_test)

# Step 5: Evaluate Model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Step 6: Predict Admission for New Data
new_data = pd.DataFrame({
    "GRE Score": [320, 300],
    "TOEFL Score": [110, 95],
    "LORs": [4, 2],
    "Papers Published": [2, 1],
    "SOP Score": [4.5, 3.0]
})

predictions = model.predict(new_data)
print("\nPredictions for New Data:")
print(predictions)  # 1: Admitted, 0: Not Admitted


Accuracy: 0.9

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.95      0.94       174
           1       0.62      0.58      0.60        26

    accuracy                           0.90       200
   macro avg       0.78      0.76      0.77       200
weighted avg       0.90      0.90      0.90       200


Confusion Matrix:
[[165   9]
 [ 11  15]]

Predictions for New Data:
[0 0]
