In [5]:
from fastapi import FastAPI
from pydantic import BaseModel
from typing import Optional
import pandas as pd
from pyspark.ml import PipelineModel
from pyspark.sql import SparkSession
from pyspark.ml import PipelineModel
from pyspark.sql.types import *

spark = SparkSession.builder.appName("TitanicClassifierAPI").getOrCreate()

schema = StructType([
    StructField("PassengerId", IntegerType(), True),
    StructField("Pclass", IntegerType(), True),
    StructField("Name", StringType(), True),
    StructField("Sex", StringType(), True),
    StructField("Age", FloatType(), True),
    StructField("SibSp", IntegerType(), True),
    StructField("Parch", IntegerType(), True),
    StructField("Ticket", StringType(), True),
    StructField("Fare", FloatType(), True),
    StructField("Cabin", StringType(), True),
    StructField("Embarked", StringType(), True),
])

# -------------------------------
# Load model at startup
# -------------------------------
MODEL_PATH = "deployment/model/sparkml"  # wherever you saved it
model = PipelineModel.load(MODEL_PATH)
# Define FastAPI app

# -------------------------------
app = FastAPI(title="TitanicClassifier Inference API")

# -------------------------------
# Define input schema
# -------------------------------
# Example: replace feature1, feature2 with your actual features
class PredictionRequest(BaseModel):
    PassengerId: Optional[int] = None
    Pclass: int
    Name: Optional[str] = None
    Sex: str
    Age: Optional[float] = None
    SibSp: Optional[int] = None
    Parch: int
    Ticket: Optional[str] = None
    Fare: Optional[float] = None
    Cabin: Optional[str] = None
    Embarked: Optional[str] = None

# -------------------------------
# /predict endpoint
# -------------------------------
@app.post("/predict")
def predict(request: PredictionRequest):

    input_data = [request.dict()]
    input_df = spark.createDataFrame(input_data, schema=schema)
    preds = model.transform(input_df)
    print(preds)
    result_df = preds.select("prediction", "probability").toPandas()
    # Return predictions as JSON
    return {"predictions": result_df.to_dict('records')}


In [4]:
print(model.stages)

[PipelineModel_3cc8edbcf60a, RandomForestClassificationModel: uid=RandomForestClassifier_f0ca91842f89, numTrees=20, numClasses=2, numFeatures=6]


In [17]:
import requests
import pandas as pd
import numpy as np
from training.spark_session import spark_session_creator
url = "http://localhost:8000/predict"

test_df=pd.read_csv(r"/root/AILabProject/data/test.csv")
test_df = test_df.where(pd.notnull(test_df), None)
test_df.replace([np.inf, -np.inf], None, inplace=True)
test_df = test_df.where(pd.notnull(test_df), None)

predictions = []

for i, row in test_df.iterrows():
    print(i)
    payload = row.to_dict()
    print(payload)
    response = requests.post(url, json=payload)
    print(response)
    predictions.append(response.json()["predictions"][0])

0
{'PassengerId': 892, 'Pclass': 3, 'Name': 'Kelly, Mr. James', 'Sex': 'male', 'Age': 34.5, 'SibSp': 0, 'Parch': 0, 'Ticket': '330911', 'Fare': 7.8292, 'Cabin': None, 'Embarked': 'Q'}
<Response [200]>
1
{'PassengerId': 893, 'Pclass': 3, 'Name': 'Wilkes, Mrs. James (Ellen Needs)', 'Sex': 'female', 'Age': 47.0, 'SibSp': 1, 'Parch': 0, 'Ticket': '363272', 'Fare': 7.0, 'Cabin': None, 'Embarked': 'S'}
<Response [200]>
2
{'PassengerId': 894, 'Pclass': 2, 'Name': 'Myles, Mr. Thomas Francis', 'Sex': 'male', 'Age': 62.0, 'SibSp': 0, 'Parch': 0, 'Ticket': '240276', 'Fare': 9.6875, 'Cabin': None, 'Embarked': 'Q'}
<Response [200]>
3
{'PassengerId': 895, 'Pclass': 3, 'Name': 'Wirz, Mr. Albert', 'Sex': 'male', 'Age': 27.0, 'SibSp': 0, 'Parch': 0, 'Ticket': '315154', 'Fare': 8.6625, 'Cabin': None, 'Embarked': 'S'}
<Response [200]>
4
{'PassengerId': 896, 'Pclass': 3, 'Name': 'Hirvonen, Mrs. Alexander (Helga E Lindqvist)', 'Sex': 'female', 'Age': 22.0, 'SibSp': 1, 'Parch': 1, 'Ticket': '3101298', 'Fare

InvalidJSONError: Out of range float values are not JSON compliant

In [18]:
import requests
import pandas as pd
import numpy as np

url = "http://localhost:8000/predict"

# 1. Load the data
test_df=pd.read_csv(r"/root/AILabProject/data/test.csv")

# 2. Clean the entire DataFrame first (your method is correct)
test_df.replace([np.inf, -np.inf], None, inplace=True)
test_df = test_df.where(pd.notnull(test_df), None)

# 3. Convert the cleaned DataFrame into a list of dictionaries
# This is the key step that correctly handles the 'None' values.
payload_list = test_df.to_dict('records')

predictions = []

# 4. Now, loop through the clean list of payloads
for i, payload in enumerate(payload_list):
    print(i)
    # This payload will now have {'Age': None} for row 10
    print(payload)
    
    try:
        response = requests.post(url, json=payload)
        # Check if the request was successful
        response.raise_for_status() 
        print(response)
        predictions.append(response.json()["predictions"][0])
    
    except requests.exceptions.RequestException as e:
        print(f"Error on row {i}: {e}")
        # If the server gives an error, print the details
        if e.response is not None:
            print(f"Server response: {e.response.text}")
        break # Stop the loop on the first error

0
{'PassengerId': 892, 'Pclass': 3, 'Name': 'Kelly, Mr. James', 'Sex': 'male', 'Age': 34.5, 'SibSp': 0, 'Parch': 0, 'Ticket': '330911', 'Fare': 7.8292, 'Cabin': None, 'Embarked': 'Q'}
<Response [200]>
1
{'PassengerId': 893, 'Pclass': 3, 'Name': 'Wilkes, Mrs. James (Ellen Needs)', 'Sex': 'female', 'Age': 47.0, 'SibSp': 1, 'Parch': 0, 'Ticket': '363272', 'Fare': 7.0, 'Cabin': None, 'Embarked': 'S'}
<Response [200]>
2
{'PassengerId': 894, 'Pclass': 2, 'Name': 'Myles, Mr. Thomas Francis', 'Sex': 'male', 'Age': 62.0, 'SibSp': 0, 'Parch': 0, 'Ticket': '240276', 'Fare': 9.6875, 'Cabin': None, 'Embarked': 'Q'}
<Response [200]>
3
{'PassengerId': 895, 'Pclass': 3, 'Name': 'Wirz, Mr. Albert', 'Sex': 'male', 'Age': 27.0, 'SibSp': 0, 'Parch': 0, 'Ticket': '315154', 'Fare': 8.6625, 'Cabin': None, 'Embarked': 'S'}
<Response [200]>
4
{'PassengerId': 896, 'Pclass': 3, 'Name': 'Hirvonen, Mrs. Alexander (Helga E Lindqvist)', 'Sex': 'female', 'Age': 22.0, 'SibSp': 1, 'Parch': 1, 'Ticket': '3101298', 'Fare

In [31]:
import requests
import pandas as pd
import numpy as np

url = "http://localhost:8000/predict"

try:
    # 1. Load the data from your CSV
    test_df = pd.read_csv(r"/root/AILabProject/data/test.csv",na_values=None)

    # 2. Clean the entire DataFrame first
    # Replace special float values (inf, -inf) with None
    test_df.replace([np.inf, -np.inf], None, inplace=True)
    # Replace NaN values with None. This is a robust way to handle it.
    test_df = test_df.where(pd.notnull(test_df), None)

    # 3. Convert the cleaned DataFrame into a list of dictionaries
    # This is the key step that correctly handles the 'None' values for 'Age'.
    payload_list = test_df.to_dict('records')

    predictions = []
    print("--- Starting API Requests ---")

    # 4. Now, loop through the clean list of payloads
    for i, payload in enumerate(payload_list):
        # This payload will now correctly have {'Age': None} when age is missing
        response = requests.post(url, json=payload)
        
        # Check if the request was successful
        response.raise_for_status() 
        
        predictions.append(response.json()["predictions"][0])
        print(f"Row {i}: Success (Status Code {response.status_code})")

    print("\n--- All predictions completed successfully! ---")

except requests.exceptions.RequestException as e:
    print(f"\n--- ERROR on row {i} ---")
    print(f"Payload sent: {payload}")
    print(f"Error details: {e}")
    if e.response is not None:
        print(f"Server response ({e.response.status_code}): {e.response.text}")

except Exception as e:
    print(f"An unexpected error occurred: {e}")

--- Starting API Requests ---
Row 0: Success (Status Code 200)
Row 1: Success (Status Code 200)
Row 2: Success (Status Code 200)
Row 3: Success (Status Code 200)
Row 4: Success (Status Code 200)
Row 5: Success (Status Code 200)
Row 6: Success (Status Code 200)
Row 7: Success (Status Code 200)
Row 8: Success (Status Code 200)
Row 9: Success (Status Code 200)

--- ERROR on row 10 ---
Payload sent: {'PassengerId': 902, 'Pclass': 3, 'Name': 'Ilieff, Mr. Ylio', 'Sex': 'male', 'Age': nan, 'SibSp': 0, 'Parch': 0, 'Ticket': '349220', 'Fare': 7.8958, 'Cabin': None, 'Embarked': 'S'}
Error details: Out of range float values are not JSON compliant


In [32]:
test_df[4:13]

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
5,897,3,"Svensson, Mr. Johan Cervin",male,14.0,0,0,7538,9.225,,S
6,898,3,"Connolly, Miss. Kate",female,30.0,0,0,330972,7.6292,,Q
7,899,2,"Caldwell, Mr. Albert Francis",male,26.0,1,1,248738,29.0,,S
8,900,3,"Abrahim, Mrs. Joseph (Sophie Halaut Easu)",female,18.0,0,0,2657,7.2292,,C
9,901,3,"Davies, Mr. John Samuel",male,21.0,2,0,A/4 48871,24.15,,S
10,902,3,"Ilieff, Mr. Ylio",male,,0,0,349220,7.8958,,S
11,903,1,"Jones, Mr. Charles Cresson",male,46.0,0,0,694,26.0,,S
12,904,1,"Snyder, Mrs. John Pillsbury (Nelle Stevenson)",female,23.0,1,0,21228,82.2667,B45,S


In [1]:
import requests
import pandas as pd
import numpy as np
import math # Needed for the robust nan check

url = "http://localhost:8000/predict"

# A helper function to clean a dictionary of non-JSON compliant values
def clean_dict_for_json(d):
    cleaned_dict = {}
    for k, v in d.items():
        # Check if the value is a float and is NaN
        if isinstance(v, float) and math.isnan(v):
            cleaned_dict[k] = None
        # Also check for Pandas' Not a Time for date columns, just in case
        elif v is pd.NaT:
            cleaned_dict[k] = None
        else:
            cleaned_dict[k] = v
    return cleaned_dict

try:
    # 1. Load the data
    test_df = pd.read_csv(r"/root/AILabProject/data/test.csv")

    # 2. Do the initial broad cleaning
    test_df.replace([np.inf, -np.inf], None, inplace=True)
    
    # 3. Convert to a list of dictionaries
    payload_list = test_df.to_dict('records')

    predictions = []
    print("--- Starting API Requests ---")

    # 4. Loop through the payloads
    for i, payload in enumerate(payload_list):
        
        # --- FINAL, BULLETPROOF CLEANING STEP ---
        # Clean each dictionary individually right before sending
        final_payload = clean_dict_for_json(payload)
        
        response = requests.post(url, json=final_payload)
        response.raise_for_status()
        
        predictions.append(response.json()["predictions"][0])
        print(f"Row {i}: Success (Status Code {response.status_code})")

    print("\n--- All predictions completed successfully! ---")

except requests.exceptions.RequestException as e:
    print(f"\n--- ERROR on row {i} ---")
    print(f"Payload that failed: {final_payload}")
    print(f"Error details: {e}")
    if e.response is not None:
        print(f"Server response ({e.response.status_code}): {e.response.text}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

--- Starting API Requests ---
Row 0: Success (Status Code 200)
Row 1: Success (Status Code 200)
Row 2: Success (Status Code 200)
Row 3: Success (Status Code 200)
Row 4: Success (Status Code 200)
Row 5: Success (Status Code 200)
Row 6: Success (Status Code 200)
Row 7: Success (Status Code 200)
Row 8: Success (Status Code 200)
Row 9: Success (Status Code 200)
Row 10: Success (Status Code 200)
Row 11: Success (Status Code 200)
Row 12: Success (Status Code 200)
Row 13: Success (Status Code 200)
Row 14: Success (Status Code 200)
Row 15: Success (Status Code 200)
Row 16: Success (Status Code 200)
Row 17: Success (Status Code 200)
Row 18: Success (Status Code 200)
Row 19: Success (Status Code 200)
Row 20: Success (Status Code 200)
Row 21: Success (Status Code 200)
Row 22: Success (Status Code 200)
Row 23: Success (Status Code 200)
Row 24: Success (Status Code 200)
Row 25: Success (Status Code 200)
Row 26: Success (Status Code 200)
Row 27: Success (Status Code 200)
Row 28: Success (Status Code

Row 237: Success (Status Code 200)
Row 238: Success (Status Code 200)
Row 239: Success (Status Code 200)
Row 240: Success (Status Code 200)
Row 241: Success (Status Code 200)
Row 242: Success (Status Code 200)
Row 243: Success (Status Code 200)
Row 244: Success (Status Code 200)
Row 245: Success (Status Code 200)
Row 246: Success (Status Code 200)
Row 247: Success (Status Code 200)
Row 248: Success (Status Code 200)
Row 249: Success (Status Code 200)
Row 250: Success (Status Code 200)
Row 251: Success (Status Code 200)
Row 252: Success (Status Code 200)
Row 253: Success (Status Code 200)
Row 254: Success (Status Code 200)
Row 255: Success (Status Code 200)
Row 256: Success (Status Code 200)
Row 257: Success (Status Code 200)
Row 258: Success (Status Code 200)
Row 259: Success (Status Code 200)
Row 260: Success (Status Code 200)
Row 261: Success (Status Code 200)
Row 262: Success (Status Code 200)
Row 263: Success (Status Code 200)
Row 264: Success (Status Code 200)
Row 265: Success (St

In [33]:
test_df = pd.read_csv(r"/root/AILabProject/data/test.csv",na_values=None)

In [37]:
test_df.where(pd.notnull(test_df), None)

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S


In [46]:
spark = SparkSession.builder.appName("TitanicClassifierAPI").getOrCreate()
df=spark.read.csv(r"/root/AILabProject/data/test.csv",header=True,inferSchema=True)

In [47]:
df.show()

+-----------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+
|PassengerId|Pclass|                Name|   Sex| Age|SibSp|Parch|          Ticket|   Fare|Cabin|Embarked|
+-----------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+
|        892|     3|    Kelly, Mr. James|  male|34.5|    0|    0|          330911| 7.8292| null|       Q|
|        893|     3|Wilkes, Mrs. Jame...|female|47.0|    1|    0|          363272|    7.0| null|       S|
|        894|     2|Myles, Mr. Thomas...|  male|62.0|    0|    0|          240276| 9.6875| null|       Q|
|        895|     3|    Wirz, Mr. Albert|  male|27.0|    0|    0|          315154| 8.6625| null|       S|
|        896|     3|Hirvonen, Mrs. Al...|female|22.0|    1|    1|         3101298|12.2875| null|       S|
|        897|     3|Svensson, Mr. Joh...|  male|14.0|    0|    0|            7538|  9.225| null|       S|
|        898|     3|Connolly, Miss. Kate|femal

In [58]:
spark.createDataFrame(pd.DataFrame(payload_list))

AttributeError: 'DataFrame' object has no attribute 'iteritems'