In [None]:
import joblib
from fastapi import FastAPI
from pydantic import BaseModel
import nest_asyncio
from uvicorn import Config, Server
import pandas as pd
import nltk
import logging

nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

rf_classifier = joblib.load('best_random_forest_classifier.joblib')
vectorizer = joblib.load('tfidf_vectorizer.joblib')

app = FastAPI()

class JobDescription(BaseModel):
    description: str

def clean_text(text):
    stopwords = nltk.corpus.stopwords.words('english')
    lemmatizer = nltk.stem.WordNetLemmatizer()
    text = text.lower()
    text = "".join([char for char in text if char.isalnum() or char in " "])
    words = [lemmatizer.lemmatize(word) for word in text.split() if word not in stopwords]
    return " ".join(words)

@app.post("/predict")
def predict(job: JobDescription):
    cleaned_text = clean_text(job.description)
    print(f"Cleaned Text: {cleaned_text}")

    vectorized_text = vectorizer.transform([cleaned_text])
    print(f"Vectorized Text Shape: {vectorized_text.shape}")

    prediction = rf_classifier.predict(vectorized_text)
    print(f"Prediction: {prediction[0]}")

    return {"predicted_domain": prediction[0]}

nest_asyncio.apply()

config = Config(app=app, host="127.0.0.1", port=8000, log_level="info")
server = Server(config=config)

server.run()


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hsahn\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\hsahn\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\hsahn\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
INFO:     Started server process [13516]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


Cleaned Text: looking software developer experience python django
Vectorized Text Shape: (1, 1000)
Prediction: Software Development
INFO:     127.0.0.1:65028 - "POST /predict HTTP/1.1" 200 OK
Cleaned Text: looking full stack developer experience react nodejs build scalable web application
Vectorized Text Shape: (1, 1000)
Prediction: Software Development
INFO:     127.0.0.1:65032 - "POST /predict HTTP/1.1" 200 OK
Cleaned Text: hiring data analyst expertise sql excel manage analyze company datasets
Vectorized Text Shape: (1, 1000)
Prediction: Other
INFO:     127.0.0.1:65045 - "POST /predict HTTP/1.1" 200 OK
Cleaned Text: looking marketing coordinator assist campaign management content creation social medium strategy
Vectorized Text Shape: (1, 1000)
Prediction: Data Science
INFO:     127.0.0.1:65048 - "POST /predict HTTP/1.1" 200 OK
Cleaned Text: seeking talent acquisition specialist lead recruitment effort improve onboarding process
Vectorized Text Shape: (1, 1000)
Prediction: Other
INFO