In [None]:
# 1. IMPORT LIBRARIES

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score


df = pd.read_csv("C:/Users/cw/Downloads/Pharmaceutical/indian_pharmaceutical_products_segmented.csv")


In [None]:
# 2. FEATURE SELECTION FOR RISK MODEL

features = [
    'price_inr',               # margin pressure
    'company_count',           # competition intensity
    'num_active_ingredients',  # formulation complexity
    'regulatory_complexity'    # compliance burden
]

X = df[features].fillna(0)
y = df['is_discontinued']

In [None]:
# 3. TRAIN–TEST SPLIT

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.30,
    random_state=42,
    stratify=y
)

In [None]:
# 4. FEATURE SCALING

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# 5. LOGISTIC REGRESSION (DISCONTINUATION RISK MODEL)

model = LogisticRegression(max_iter=1000,class_weight='balanced')
model.fit(X_train_scaled, y_train)

In [None]:
# 6. MODEL EVALUATION

y_prob = model.predict_proba(X_test_scaled)[:, 1]
y_pred = (y_prob >= 0.3).astype(int)

print(classification_report(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_prob))


              precision    recall  f1-score   support

       False       0.99      0.13      0.23     73818
        True       0.03      0.95      0.07      2371

    accuracy                           0.16     76189
   macro avg       0.51      0.54      0.15     76189
weighted avg       0.96      0.16      0.22     76189

ROC AUC: 0.6516046955980134


In [None]:
# 7. MODEL INTERPRETATION (ODDS RATIOS)

odds_ratios = pd.DataFrame({
    'feature': features,
    'odds_ratio': np.exp(model.coef_[0])
}).sort_values('odds_ratio', ascending=False)

print(odds_ratios)

                  feature  odds_ratio
0               price_inr    1.058743
2  num_active_ingredients    0.965084
3   regulatory_complexity    0.897984
1           company_count    0.573007


In [None]:
# 8. PREDICT DISCONTINUATION RISK FOR ALL PRODUCTS

df['predicted_discontinuation_risk'] = model.predict_proba(
    scaler.transform(X)
)[:, 1]

df['predicted_discontinuation_risk']

0         0.255908
1         0.337003
2         0.500853
3         0.558816
4         0.609386
            ...   
253957    0.417904
253958    0.603230
253959    0.575937
253960    0.272910
253961    0.610302
Name: predicted_discontinuation_risk, Length: 253962, dtype: float64

In [19]:
# 12. RISK BUCKETING (BUSINESS FRIENDLY)

df['risk_bucket'] = pd.cut(
    df['predicted_discontinuation_risk'],
    bins=[0, 0.30, 0.60, 1.00],
    labels=['Low Risk', 'Medium Risk', 'High Risk']
)

df[['product_id', 'predicted_discontinuation_risk', 'risk_bucket']].head(10)

Unnamed: 0,product_id,predicted_discontinuation_risk,risk_bucket
0,1,0.255908,Low Risk
1,2,0.337003,Medium Risk
2,3,0.500853,Medium Risk
3,4,0.558816,Medium Risk
4,5,0.609386,High Risk
5,6,0.475218,Medium Risk
6,7,0.255907,Low Risk
7,8,0.337003,Medium Risk
8,9,0.576073,Medium Risk
9,10,0.498053,Medium Risk


In [20]:
# 13. BUSINESS SUMMARY BY RISK LEVEL

risk_summary = (
    df.groupby('risk_bucket')
    .agg(
        avg_price=('price_inr', 'mean'),
        avg_competition=('company_count', 'mean'),
        avg_regulatory_complexity=('regulatory_complexity', 'mean'),
        discontinued_rate=('is_discontinued', 'mean'),
        product_count=('price_inr', 'count')
    )
)

risk_summary

  df.groupby('risk_bucket')


Unnamed: 0_level_0,avg_price,avg_competition,avg_regulatory_complexity,discontinued_rate,product_count
risk_bucket,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Low Risk,102.265549,2557.75413,1.195818,0.013726,32566
Medium Risk,174.309114,624.900485,1.527387,0.029644,197172
High Risk,1279.137834,29.302154,1.032736,0.066546,24224


In [21]:
df.to_csv("C:/Users/cw/Downloads/Pharmaceutical/indian_pharmaceutical_products_segmented.csv", index=False)