#STEP 1: Setup Environment on Google Colab

In [1]:
# Install required libraries
!pip install pandas scikit-learn matplotlib seaborn joblib ibm-watson-machine-learning



#STEP 2: Upload & Load Dataset

In [2]:
from google.colab import files
uploaded = files.upload()  # Upload `nsapallschemes.csv`

import pandas as pd
df = pd.read_csv("nsapallschemes.csv")
df.head()

Saving nsapallschemes.csv to nsapallschemes.csv


Unnamed: 0,finyear,lgdstatecode,statename,lgddistrictcode,districtname,schemecode,totalbeneficiaries,totalmale,totalfemale,totaltransgender,totalsc,totalst,totalgen,totalobc,totalaadhaar,totalmobilenumber
0,2025-2026,1,JAMMU AND KASHMIR,1,ANANTNAG,IGNDPS,108,72,36,0,0,3,104,1,108,69
1,2025-2026,1,JAMMU AND KASHMIR,1,ANANTNAG,IGNOAPS,8438,5059,3379,0,37,235,8083,83,8371,7190
2,2025-2026,1,JAMMU AND KASHMIR,1,ANANTNAG,IGNWPS,202,0,202,0,1,15,180,6,200,159
3,2025-2026,1,JAMMU AND KASHMIR,10,POONCH,IGNDPS,310,211,99,0,0,77,200,33,234,110
4,2025-2026,1,JAMMU AND KASHMIR,10,POONCH,IGNOAPS,5958,3958,2000,0,2,1347,4367,242,3875,2287


# STEP 3: Preprocess Dataset

In [3]:
# Optional mapping for clarity
scheme_mapping = {
    'IGNOAPS': 'Old Age Pension',
    'IGNWPS': 'Widow Pension',
    'IGNDPS': 'Disability Pension'
}
df['scheme_full_name'] = df['schemecode'].map(scheme_mapping)

# Select features and target
df = df[['totalbeneficiaries', 'totalmale', 'totalfemale', 'totaltransgender',
         'totalsc', 'totalst', 'totalgen', 'totalobc',
         'totalaadhaar', 'totalmobilenumber', 'schemecode']]


#STEP 4: Train the Model

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

X = df.drop('schemecode', axis=1)
y = df['schemecode']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

print(classification_report(y_test, model.predict(X_test)))

# Save model
joblib.dump(model, "nsap_model.pkl")


              precision    recall  f1-score   support

      IGNDPS       0.96      0.96      0.96       139
     IGNOAPS       0.97      0.96      0.97       151
      IGNWPS       0.99      1.00      0.99       142

    accuracy                           0.97       432
   macro avg       0.97      0.97      0.97       432
weighted avg       0.97      0.97      0.97       432



['nsap_model.pkl']