# Batch Prediction from credit_default.xls

Convert the Excel dataset to CSV and submit it to the batch inference endpoint.

In [1]:
from pathlib import Path

NOTEBOOK_DIR = Path.cwd()
DATA_FILE = (NOTEBOOK_DIR / "credit_default.xls").resolve()
BATCH_CSV = NOTEBOOK_DIR / "credit_default_batch.csv"

print(f"Excel source: {DATA_FILE}")
print(f"Batch CSV will be written to: {BATCH_CSV}")

Excel source: C:\Users\admor\projects\default-credit-card-clients\notebooks\credit_default.xls
Batch CSV will be written to: c:\Users\admor\projects\default-credit-card-clients\notebooks\credit_default_batch.csv


In [3]:
import pandas as pd

df = pd.read_excel(DATA_FILE, header=1)
df.to_csv(BATCH_CSV, index=False)
print(f"Wrote {len(df)} rows to {BATCH_CSV}")
df.head()

Wrote 30000 rows to c:\Users\admor\projects\default-credit-card-clients\notebooks\credit_default_batch.csv


Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default payment next month
0,1,20000,2,2,1,24,2,2,-1,-1,...,0,0,0,0,689,0,0,0,0,1
1,2,120000,2,2,2,26,-1,2,0,0,...,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,3,90000,2,2,2,34,0,0,0,0,...,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,4,50000,2,2,1,37,0,0,0,0,...,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,5,50000,1,2,1,57,-1,0,-1,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0


In [4]:
import httpx

API_URL = "http://127.0.0.1:8000/api/v1/predictions/batch"

with httpx.Client(timeout=60.0) as client:
    with BATCH_CSV.open("rb") as fp:
        files = {"file": (BATCH_CSV.name, fp, "text/csv")}
        response = client.post(API_URL, files=files)

response.raise_for_status()
print("Status:", response.status_code)
print(response.text[:1000])

Status: 200
id,probability,is_default
1,0.6594153246410956,True
2,0.35148843079188496,True
3,0.10778358034878417,False
4,0.16686534493380714,False
5,0.11500516629335067,False
6,0.15431100549680093,False
7,0.0780680148326967,False
8,0.11730429268531198,False
9,0.251131035766207,False
10,0.12732036826491006,False
11,0.1909052408691257,False
12,0.14069809770981953,False
13,0.08301127596918792,False
14,0.5536550495569397,True
15,0.03762506554962749,False
16,0.36495635114586733,True
17,0.45906275063404356,True
18,0.047137720120196826,False
19,0.469610676606483,True
20,0.3734234300368021,True
21,0.08301127596918792,False
22,0.20200867837557265,False
23,0.7169246603765835,True
24,0.09388648229944527,False
25,0.10999070787410561,False
26,0.1903562299600493,False
27,0.2815855671208902,False
28,0.14311294190839213,False
29,0.12502124187294872,False
30,0.13540874992157578,False
31,0.043570227489406,False
32,0.6922925222144369,True
33,0.10171923815685276,False
34,0.0361467103


In [5]:
import io

batch_results = pd.read_csv(io.StringIO(response.text))
batch_results.head()

Unnamed: 0,id,probability,is_default
0,1,0.659415,True
1,2,0.351488,True
2,3,0.107784,False
3,4,0.166865,False
4,5,0.115005,False


In [6]:
batch_results.to_csv("results.csv")