In [1]:
import pandas as pd
import numpy as np

# Simulate healthcare + stock data
np.random.seed(42)
n = 1000
data = pd.DataFrame({
    'hospital_visits': np.random.randint(100, 10000, n),
    'insurance_claims': np.random.randint(50, 9000, n),
    'disease_spike_index': np.random.uniform(0, 1, n),
    'healthcare_sentiment': np.random.uniform(-1, 1, n),
    'stock_price_change': np.random.normal(0, 1, n),
})

# Binary target: 1 = Good Investment, 0 = Bad
data['invest_good'] = (data['stock_price_change'] > 0.2).astype(int)
data.head()


Unnamed: 0,hospital_visits,insurance_claims,disease_spike_index,healthcare_sentiment,stock_price_change,invest_good
0,7370,3227,0.011031,-0.718394,0.635446,1
1,960,8845,0.416154,0.244827,-1.117418,0
2,5490,6074,0.481344,0.562644,-1.707384,0
3,5291,5159,0.019192,0.156597,-1.553584,0
4,5834,4196,0.259813,-0.706075,-0.27016,0


Preporocessing

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Features and target
X = data.drop(columns=['stock_price_change', 'invest_good'])
y = data['invest_good']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


Model Building (Logistic Regression)

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Train model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Predict
y_pred = model.predict(X_test_scaled)

# Evaluate
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[105   0]
 [ 95   0]]
              precision    recall  f1-score   support

           0       0.53      1.00      0.69       105
           1       0.00      0.00      0.00        95

    accuracy                           0.53       200
   macro avg       0.26      0.50      0.34       200
weighted avg       0.28      0.53      0.36       200



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Prediction

In [7]:
# New scenario: Simulate healthcare trend
new_data = pd.DataFrame([{
    'hospital_visits': 500000,
    'insurance_claims': 10,
    'disease_spike_index': 0.4,
    'healthcare_sentiment': 0.3
}])

# Preprocess
new_data_scaled = scaler.transform(new_data)

# Predict
prediction = model.predict(new_data_scaled)[0]
probability = model.predict_proba(new_data_scaled)[0][1]

print("Investment Decision:", "✅ Good Investment" if prediction else "❌ Bad Investment")
print(f"Confidence: {probability:.2f}")


Investment Decision: ✅ Good Investment
Confidence: 1.00
