In [1]:
{
 "cells": [
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "!pip install xgboost scikit-learn pandas numpy"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import classification_report\n",
    "import xgboost as xgb\n",
    "import pickle\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "# Create synthetic dataset\n",
    "np.random.seed(42)\n",
    "df = pd.DataFrame({\n",
    "    'amount': np.random.uniform(10, 5000, 1000),\n",
    "    'location': np.random.choice(['Chennai', 'Mumbai', 'Delhi'], 1000),\n",
    "    'merchant': np.random.choice(['Amazon', 'Flipkart', 'Zomato'], 1000),\n",
    "    'is_fraud': np.random.choice([0, 1], 1000, p=[0.95, 0.05])\n",
    "})\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "# Encode categorical variables\n",
    "df_encoded = pd.get_dummies(df, columns=['location', 'merchant'])\n",
    "X = df_encoded.drop('is_fraud', axis=1)\n",
    "y = df_encoded['is_fraud']\n",
    "\n",
    "print(\"Feature columns:\", X.columns.tolist())"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "# Train/test split and train XGBoost\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    X, y, test_size=0.2, random_state=42)\n",
    "\n",
    "model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')\n",
    "model.fit(X_train, y_train)\n",
    "\n",
    "# Evaluate\n",
    "y_pred = model.predict(X_test)\n",
    "print(classification_report(y_test, y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "source": [
    "# Save model to backend/models/xgboost_model.pkl\n",
    "output_path = \"../backend/models\"\n",
    "os.makedirs(output_path, exist_ok=True)\n",
    "\n",
    "model_filename = os.path.join(output_path, \"xgboost_model.pkl\")\n",
    "with open(model_filename, \"wb\") as f:\n",
    "    pickle.dump(model, f)\n",
    "\n",
    "print(f\"✅ Model saved to: {model_filename}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


{'cells': [{'cell_type': 'code',
   'metadata': {},
   'source': ['!pip install xgboost scikit-learn pandas numpy']},
  {'cell_type': 'code',
   'metadata': {},
   'source': ['import pandas as pd\n',
    'import numpy as np\n',
    'from sklearn.model_selection import train_test_split\n',
    'from sklearn.metrics import classification_report\n',
    'import xgboost as xgb\n',
    'import pickle\n',
    'import os']},
  {'cell_type': 'code',
   'metadata': {},
   'source': ['# Create synthetic dataset\n',
    'np.random.seed(42)\n',
    'df = pd.DataFrame({\n',
    "    'amount': np.random.uniform(10, 5000, 1000),\n",
    "    'location': np.random.choice(['Chennai', 'Mumbai', 'Delhi'], 1000),\n",
    "    'merchant': np.random.choice(['Amazon', 'Flipkart', 'Zomato'], 1000),\n",
    "    'is_fraud': np.random.choice([0, 1], 1000, p=[0.95, 0.05])\n",
    '})\n',
    'df.head()']},
  {'cell_type': 'code',
   'metadata': {},
   'source': ['# Encode categorical variables\n',
    "df_encode

In [6]:
# Save model to backend/models/xgboost_model.pkl
output_path = "../backend/models"
os.makedirs(output_path, exist_ok=True)

model_filename = os.path.join(output_path, "xgboost_model.pkl")
with open(model_filename, "wb") as f:
    pickle.dump(model, f)

print(f"✅ Model saved to: {model_filename}")


✅ Model saved to: ../backend/models\xgboost_model.pkl


In [5]:
# Train/test split and train XGBoost
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.95      0.99      0.97       191
           1       0.00      0.00      0.00         9

    accuracy                           0.94       200
   macro avg       0.48      0.49      0.49       200
weighted avg       0.91      0.94      0.93       200



Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [4]:
# Encode categorical variables
df_encoded = pd.get_dummies(df, columns=['location', 'merchant'])
X = df_encoded.drop('is_fraud', axis=1)
y = df_encoded['is_fraud']

print("Feature columns:", X.columns.tolist())


Feature columns: ['amount', 'location_Chennai', 'location_Delhi', 'location_Mumbai', 'merchant_Amazon', 'merchant_Flipkart', 'merchant_Zomato']


In [3]:
# Create synthetic dataset
np.random.seed(42)
df = pd.DataFrame({
    'amount': np.random.uniform(10, 5000, 1000),
    'location': np.random.choice(['Chennai', 'Mumbai', 'Delhi'], 1000),
    'merchant': np.random.choice(['Amazon', 'Flipkart', 'Zomato'], 1000),
    'is_fraud': np.random.choice([0, 1], 1000, p=[0.95, 0.05])
})
df.head()


Unnamed: 0,amount,location,merchant,is_fraud
0,1878.955193,Delhi,Flipkart,0
1,4754.064389,Mumbai,Amazon,0
2,3662.64977,Delhi,Flipkart,0
3,2997.305836,Delhi,Zomato,0
4,788.533016,Chennai,Amazon,0


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import xgboost as xgb
import pickle
import os


In [1]:
# Install required libraries
!pip install xgboost scikit-learn pandas numpy


Collecting xgboost
  Using cached xgboost-3.0.3-py3-none-win_amd64.whl.metadata (2.1 kB)
Using cached xgboost-3.0.3-py3-none-win_amd64.whl (149.9 MB)
Installing collected packages: xgboost
Successfully installed xgboost-3.0.3
