In [12]:
import numpy as np
import pandas as pd

Part One

In [27]:
from statsmodels.sandbox.regression.gmm import GMM

# Loading the dataset
data = pd.read_csv("/MBAN- Schulich/MBAN- Sem 2/Predictive Modeling/Mid term Project/midterm_partone.csv")

In [None]:
# Extracting variables
y = data["Stock Change"].values
X = data[["Inventory Turnover", "Operating Profit", "Interaction Effect"]].values
Z = data[["Current Ratio", "Quick Ratio", "Debt Asset Ratio"]].values

In [15]:
# Defining the GMM model with updated moment conditions
class CustomGMM(GMM):
    def momcond(self, params):
        p0, p1, p2, p3, delta = params
        residual = self.endog - p0 - p1 * self.exog[:, 0] - p2 * self.exog[:, 1] - p3 * self.exog[:, 2]
        
        # Moment conditions
        moment_conditions = np.column_stack((
            residual,
            residual[:, None] * self.instrument - delta,
            residual[:, None] * self.exog
        ))
        return moment_conditions

In [16]:
# Initialing parameter estimates
initial_params = np.full(5, 0.1)

In [17]:
# Fitting the GMM model
model = CustomGMM(endog=y, exog=X, instrument=Z, k_moms=7, k_params=5)
results = model.fit(start_params=initial_params)

Optimization terminated successfully.
         Current function value: 0.000095
         Iterations: 12
         Function evaluations: 16
         Gradient evaluations: 16
Optimization terminated successfully.
         Current function value: 0.001753
         Iterations: 8
         Function evaluations: 11
         Gradient evaluations: 11
Optimization terminated successfully.
         Current function value: 0.001735
         Iterations: 7
         Function evaluations: 12
         Gradient evaluations: 12
Optimization terminated successfully.
         Current function value: 0.001735
         Iterations: 5
         Function evaluations: 9
         Gradient evaluations: 9
Optimization terminated successfully.
         Current function value: 0.001735
         Iterations: 0
         Function evaluations: 1
         Gradient evaluations: 1


In [None]:
# Displaying the summary of the GMM model
print(results.summary())

                              CustomGMM Results                               
Dep. Variable:                      y   Hansen J:                        2.942
Model:                      CustomGMM   Prob (Hansen J):                 0.230
Method:                           GMM                                         
Date:                Fri, 08 Nov 2024                                         
Time:                        11:06:16                                         
No. Observations:                1696                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
p 0           -0.0011      0.017     -0.063      0.949      -0.034       0.032
p 1            0.0004      0.000      1.001      0.317      -0.000       0.001
p 2           -0.1191      0.031     -3.860      0.000      -0.180      -0.059
p 3            0.0014      0.000      3.640      0.0

Part Two

In [19]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

In [28]:
# Loading the dataset
data2 = pd.read_csv("/MBAN- Schulich/MBAN- Sem 2/Predictive Modeling/Mid term Project/midterm_parttwo.csv")

In [29]:
data2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8081 entries, 0 to 8080
Data columns (total 7 columns):
 #   Column                                Non-Null Count  Dtype 
---  ------                                --------------  ----- 
 0   Years of Education after High School  8081 non-null   int64 
 1   Requested Credit Amount               8081 non-null   object
 2   Number of Dependents                  8081 non-null   object
 3   Monthly Income                        8081 non-null   object
 4   Monthly Expense                       8081 non-null   object
 5   Marital Status                        8081 non-null   object
 6   Credit Rating                         8081 non-null   object
dtypes: int64(1), object(6)
memory usage: 442.1+ KB


Part Two: Q1

In [30]:
# Preprocessing data
# Converting categorical columns to one-hot encoding
data2 = pd.get_dummies(data2, columns=['Requested Credit Amount', 'Marital Status','Number of Dependents', 'Monthly Income', 'Monthly Expense'])

In [31]:
# Converting 'Credit Rating' to binary
data2['Credit Rating'] = data2['Credit Rating'].apply(lambda x: 1 if x == 'Positive' else 0)

In [32]:
# Splitting data
X = data2.drop(columns=['Credit Rating'])
y = data2['Credit Rating']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

In [33]:
# Training logistic regression with balanced class weights
model = LogisticRegression()
model.fit(X_train, y_train)

In [34]:
# Predicting on test set with default threshold
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]

In [35]:
# Calculating metrics
conf_matrix = confusion_matrix(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Confusion Matrix:\n", conf_matrix)
print(f"Precision: {precision:.2f}, Recall: {recall:.2f}, F1 Score: {f1:.2f}")

Confusion Matrix:
 [[   0  577]
 [   0 3464]]
Precision: 0.86, Recall: 1.00, F1 Score: 0.92


Part Two: Q2

In [36]:
# Adjusting threshold for 15% approval rate
threshold = np.percentile(y_pred_proba, 85)  # 85th percentile corresponds to top 15%
y_pred_adjusted = (y_pred_proba >= threshold).astype(int)

In [38]:
# Calculating metrics with the new threshold
conf_matrix_adj = confusion_matrix(y_test, y_pred_adjusted)
precision_adj = precision_score(y_test, y_pred_adjusted)
recall_adj = recall_score(y_test, y_pred_adjusted)
f1_adj = f1_score(y_test, y_pred_adjusted)

print("Adjusted Confusion Matrix:\n", conf_matrix_adj)
print(f"Adjusted Precision: {precision_adj:.2f}, Adjusted Recall: {recall_adj:.2f}, Adjusted F1 Score: {f1_adj:.2f}")

Adjusted Confusion Matrix:
 [[ 495   82]
 [2937  527]]
Adjusted Precision: 0.87, Adjusted Recall: 0.15, Adjusted F1 Score: 0.26
