In [77]:
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (
    confusion_matrix, 
    classification_report, 
    f1_score, 
    accuracy_score, 
    precision_score, 
    recall_score
)
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [78]:
# Load test data
X_test = pd.read_csv('../Data/X_test.csv')
y_test = pd.read_csv('../Data/y_test.csv').squeeze()
company_names = X_test['company']
X_test = X_test.drop('company', axis=1)
print("Data Loaded")
print(f"Test set shape: {X_test.shape}")
print(f"Features: {list(X_test.columns)}")
print("\nTest data preview (first 10 rows):")
display(X_test.head(10))

Data Loaded
Test set shape: (242, 9)
Features: ['industry', 'stage', 'funds_raised', 'region', 'recency', 'events_deviation', 'recency_deviation', 'layoff_events_category', 'funds_raised_binned']

Test data preview (first 10 rows):


Unnamed: 0,industry,stage,funds_raised,region,recency,events_deviation,recency_deviation,layoff_events_category,funds_raised_binned
0,Transportation,Unknown,45.0,North America,5,-0.458333,2.055556,once,tiny
1,Media,Post-IPO,77.0,North America,5,-0.435897,2.333333,once,small
2,Healthcare,Post-IPO,172.0,North America,2,-0.260417,-0.59375,once,medium
3,Finance,Series E,321.0,Europe,3,0.709877,-0.006173,twice,medium
4,Education,Series D,483.0,North America,3,-0.527778,0.138889,once,large
5,Transportation,Unknown,86.0,North America,1,-0.458333,-1.944444,once,small
6,Food,Series C,127.0,North America,0,-0.378788,-2.909091,once,small
7,Sales,Post-IPO,65.0,North America,5,3.583333,2.583333,four_plus,small
8,Food,Series D,711.0,North America,2,-0.378788,-0.909091,once,large
9,Finance,Series C,142.0,North America,3,-0.290123,-0.006173,once,small


In [79]:
# Encode categorical features
X_test_encoded = X_test.copy()

for col in ['industry', 'stage', 'region', 'layoff_events_category', 'funds_raised_binned']:
    le = LabelEncoder()
    le.fit(X_test[col])
    X_test_encoded[col] = le.transform(X_test[col])

print("Encoding complete.")
print(f"Encoded test shape: {X_test_encoded.shape}")

Encoding complete.
Encoded test shape: (242, 9)


In [80]:
# Load best model (XGBoost)
model = joblib.load('../Model/xgboost_model.pkl')

# Make predictions
y_pred = model.predict(X_test_encoded)
y_pred_proba = model.predict_proba(X_test_encoded)[:, 1]
print("Prediction complete.")

Prediction complete.


In [81]:
predictions_df = pd.DataFrame({
    'Company': company_names.values,
    'Actual': y_test.values,
    'Predicted': y_pred,
    'Probability': y_pred_proba
})

predictions_df['Correct'] = predictions_df['Actual'] == predictions_df['Predicted']

In [82]:
# Show random 20 predictions for demonstration
np.random.seed(42)
random_indices = np.random.choice(len(predictions_df), 20, replace=False)
sample_predictions = predictions_df.iloc[random_indices].sort_index()

print("Random Predictions Sample (20 samples):")

def style_predictions(df):
    styled = df.style
    
    styled = styled.applymap(
        lambda x: 'background-color: skyblue' if x == True else 'background-color: lightcoral',
        subset=['Correct']
    )
    
    def prob_color(val):
        if val > 0.7:
            return 'background-color: magenta; color: white'
        elif 0.5 <= val <= 0.7:
            return 'background-color: yellow'
        elif val < 0.5:
            return 'background-color: lightgreen'
        else:
            return ''
    
    styled = styled.applymap(prob_color, subset=['Probability'])
    
    return styled.set_properties(**{'text-align': 'center'})

display(style_predictions(sample_predictions))

Random Predictions Sample (20 samples):


Unnamed: 0,Company,Actual,Predicted,Probability,Correct
6,Restaurant365,0,0,0.312574,True
9,Embroker,0,0,0.25904,True
15,Autograph,1,0,0.283528,False
24,Guild,1,1,0.581372,True
45,Intuit,1,1,0.84977,True
66,Glisser,1,1,0.580257,True
82,Atlas,0,0,0.223981,True
113,Deepwatch,0,0,0.173749,True
127,Shogun,0,0,0.259571,True
150,Proofpoint,0,0,0.19131,True


In [83]:
print("Predictions Summary:")
print(f"Total predictions: {len(predictions_df)}")
print(f"Correct: {predictions_df['Correct'].sum()}")
print(f"Incorrect: {(~predictions_df['Correct']).sum()}")
print(f"Accuracy: {predictions_df['Correct'].mean():.2%}")

Predictions Summary:
Total predictions: 242
Correct: 190
Incorrect: 52
Accuracy: 78.51%


In [84]:
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

print("Confusion Matrix:")
print(f"TP (True Positive)   = {tp}  → correctly identified high-risk companies")
print(f"FP (False Positive)  = {fp}  → predicted high-risk but actually safe")
print(f"FN (False Negative)  = {fn}  → predicted safe but actually high-risk")
print(f"TN (True Negative)   = {tn}  → correctly identified safe companies")

print("\nDetailed Classification Report:")
print(classification_report(y_test, y_pred, target_names=['Low Risk', 'High Risk']))

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Key Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

Confusion Matrix:
TP (True Positive)   = 64  → correctly identified high-risk companies
FP (False Positive)  = 31  → predicted high-risk but actually safe
FN (False Negative)  = 21  → predicted safe but actually high-risk
TN (True Negative)   = 126  → correctly identified safe companies

Detailed Classification Report:
              precision    recall  f1-score   support

    Low Risk       0.86      0.80      0.83       157
   High Risk       0.67      0.75      0.71        85

    accuracy                           0.79       242
   macro avg       0.77      0.78      0.77       242
weighted avg       0.79      0.79      0.79       242

Key Metrics:
Accuracy: 0.7851
Precision: 0.6737
Recall: 0.7529
F1-Score: 0.7111


In [85]:
# FP high means model is over-sensitive and flags safe companies as risky.
# FN high means model misses truly high-risk companies.

In [86]:
# Accuracy 0.7851: Model correctly classifies 78.5% of all companies
# Precision 0.6737: Of companies predicted as high-risk, 67% are actually high-risk (33% false alarms)
# Recall 0.7529: Model catches 74.1% of actual high-risk companies (misses 24.7%)
# F1-Score 0.7111: Balanced measure of model performance
# 
# Business Insight: The model reliably identifies ~3/4 of high-risk companies with moderate false positives,
# making it useful for investors to flag potentially unstable companies for further investigation.