In [1]:
import pandas as pd
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
data = {
    'GRE': [330, 310, 320, 300, 340, 360, 280, 300, 340, 310],
    'GPA': [3.5, 3.0, 3.2, 2.8, 3.8, 4.0, 2.5, 3.0, 3.9, 3.2],
    'Prestige': [1, 2, 1, 3, 1, 2, 4, 3, 1, 2],
    'Admit': [1, 0, 1, 0, 1, 1, 0, 0, 1, 0]
}

In [3]:
df = pd.DataFrame(data)

In [5]:
X = df[['GRE', 'GPA', 'Prestige']]
y = df['Admit']

In [6]:
X = sm.add_constant(X)

In [8]:
print(df.groupby(['Admit', 'GRE', 'GPA', 'Prestige']).size())

Admit  GRE  GPA  Prestige
0      280  2.5  4           1
       300  2.8  3           1
            3.0  3           1
       310  3.0  2           1
            3.2  2           1
1      320  3.2  1           1
       330  3.5  1           1
       340  3.8  1           1
            3.9  1           1
       360  4.0  2           1
dtype: int64


In [9]:
print(df.corr())


               GRE       GPA  Prestige     Admit
GRE       1.000000  0.975442 -0.753512  0.842160
GPA       0.975442  1.000000 -0.762513  0.826056
Prestige -0.753512 -0.762513  1.000000 -0.800000
Admit     0.842160  0.826056 -0.800000  1.000000


In [10]:
logit_model = sm.Logit(y, sm.add_constant(df[['GRE']]))
result = logit_model.fit()
print(result.summary())

         Current function value: 0.000004
         Iterations: 35
                           Logit Regression Results                           
Dep. Variable:                  Admit   No. Observations:                   10
Model:                          Logit   Df Residuals:                        8
Method:                           MLE   Df Model:                            1
Date:                Wed, 16 Jul 2025   Pseudo R-squ.:                   1.000
Time:                        06:17:36   Log-Likelihood:            -3.7633e-05
converged:                      False   LL-Null:                       -6.9315
Covariance Type:            nonrobust   LLR p-value:                 0.0001966
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const       -707.6621   1.03e+04     -0.069      0.945   -2.08e+04    1.94e+04
GRE            2.2455     32.609      0.069      0.945     -61.66



In [18]:
from sklearn.linear_model import LogisticRegression


In [14]:
import statsmodels.api as sm
X_reduced = sm.add_constant(df[['GRE', 'GPA']])
logit_model = sm.Logit(y, X_reduced)
result = logit_model.fit()
print(result.summary())


         Current function value: 0.000000
         Iterations: 35
                           Logit Regression Results                           
Dep. Variable:                  Admit   No. Observations:                   10
Model:                          Logit   Df Residuals:                        7
Method:                           MLE   Df Model:                            2
Date:                Wed, 16 Jul 2025   Pseudo R-squ.:                   1.000
Time:                        06:19:26   Log-Likelihood:            -3.1396e-09
converged:                      False   LL-Null:                       -6.9315
Covariance Type:            nonrobust   LLR p-value:                 0.0009766
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const      -1388.3969    1.3e+07     -0.000      1.000   -2.55e+07    2.54e+07
GRE            4.0546   3570.649      0.001      0.999   -6994.28



In [15]:
X = df[['GRE', 'GPA', 'Prestige']]

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [17]:
clf = LogisticRegression()
clf.fit(X_train, y_train)

In [19]:
y_pred = clf.predict(X_test)

In [20]:
print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 1.0
Confusion Matrix:
 [[1 0]
 [0 2]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3

