In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
import joblib



In [25]:
# Load data
data = pd.read_csv("dataset.csv")

In [26]:
data

Unnamed: 0,externalStatus,internalStatus
0,PORT OUT,Port Out
1,TERMINAL IN,Inbound Terminal
2,PORT IN,Port In
3,Vessel departure from first POL (Vessel name :...,Departure
4,Vessel arrival at final POD (Vessel name : TIA...,Arrival
...,...,...
1217,Import Loaded on Rail,Loaded on Vessel
1218,Full Transshipment Loaded,Loaded on Vessel
1219,Full Transshipment Loaded,Loaded on Vessel
1220,Export Loaded on Vessel,Loaded on Vessel


In [27]:
# Check for missing values
print("Missing values in the dataset:")
print(data.isnull().sum())

Missing values in the dataset:
externalStatus    0
internalStatus    0
dtype: int64


In [28]:
# Split data into features and target
X = data["externalStatus"]
y = data["internalStatus"]

In [29]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
# Define preprocessing and modeling pipeline
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),  # Convert text to numerical features using TF-IDF
    ('clf', RandomForestClassifier())  # Random Forest Classifier
])


In [20]:
# Append classifier to preprocessing pipeline
clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', RandomForestClassifier())])

In [31]:
# Fit the model
pipeline.fit(X_train, y_train)

In [32]:
# Evaluate the model
train_score = pipeline.score(X_train, y_train)
test_score = pipeline.score(X_test, y_test)
print(f"Train Accuracy: {train_score:.2f}")
print(f"Test Accuracy: {test_score:.2f}")

Train Accuracy: 1.00
Test Accuracy: 1.00


In [38]:
from fastapi import FastAPI
from pydantic import BaseModel
import joblib

In [42]:
# Define FastAPI app
app = FastAPI()

# Define request and response models
class PredictionRequest(BaseModel):
    externalStatus: str

class PredictionResponse(BaseModel):
    predicted_internalStatus: str

# Define API endpoint
@app.post("/predict", response_model=PredictionResponse)
def predict_status(request: PredictionRequest):
    # Make prediction
    prediction = pipeline.predict([request.externalStatus])[0]
    
    # Return prediction
    return {"predicted_internalStatus": prediction}