In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report

print("Libraries imported successfully!")

Libraries imported successfully!


In [2]:
np.random.seed(42)
num_samples = 1000

data = {
    'debtToEquity': np.random.uniform(0, 300, num_samples),
    'returnOnAssets': np.random.uniform(-0.1, 0.3, num_samples),
    'grossMargin': np.random.uniform(0.1, 0.9, num_samples),
    'operatingMargin': np.random.uniform(-0.2, 0.5, num_samples),
    'marketCap': np.random.lognormal(25, 2, num_samples),
    'GDP_latest': np.random.uniform(20000, 35000, num_samples),
    'FED_funds_latest': np.random.uniform(3.0, 5.5, num_samples),
    'news_sentiment': np.random.randint(-10, 10, num_samples)
}

df = pd.DataFrame(data)

print("Synthetic DataFrame created with shape:", df.shape)
df.head()

Synthetic DataFrame created with shape: (1000, 8)


Unnamed: 0,debtToEquity,returnOnAssets,grossMargin,operatingMargin,marketCap,GDP_latest,FED_funds_latest,news_sentiment
0,112.362036,-0.025947,0.309365,0.270892,1026608000000.0,28365.330368,3.391532,-5
1,285.214292,0.11676,0.297583,0.357677,134707100000.0,20819.154548,5.4369,2
2,219.598183,0.249178,0.825004,-0.024672,21407200000.0,34577.67952,5.106346,-6
3,179.597545,0.19289,0.299637,0.237412,179207200000.0,27689.228662,4.252192,4
4,46.805592,0.222624,0.31756,0.200222,28747580000.0,24363.152813,5.453554,7


In [3]:
# Define what makes a 'good' company in our synthetic world
conditions = [
    (df['grossMargin'] > 0.4) & (df['debtToEquity'] < 100) & (df['returnOnAssets'] > 0.05) & (df['news_sentiment'] >= 0),
    (df['grossMargin'] > 0.6) & (df['debtToEquity'] < 150) & (df['returnOnAssets'] > 0.1)
]

df['credit_worthiness'] = np.where(np.logical_or.reduce(conditions), 1, 0)

print("Target label 'credit_worthiness' created.")
print("Distribution of labels:")
print(df['credit_worthiness'].value_counts())

Target label 'credit_worthiness' created.
Distribution of labels:
credit_worthiness
0    867
1    133
Name: count, dtype: int64


In [4]:
X = df.drop('credit_worthiness', axis=1)
y = df['credit_worthiness']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       173
           1       0.96      0.96      0.96        27

    accuracy                           0.99       200
   macro avg       0.98      0.98      0.98       200
weighted avg       0.99      0.99      0.99       200



In [5]:
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

print("Model saved successfully as model.pkl")

Model saved successfully as model.pkl
