In [1]:
!pip install fastapi uvicorn
!pip install pyngrok
!pip install fastapi nest-asyncio pyngrok uvicorn
!pip install --upgrade fastapi
!pip install typing_extensions
!pip install --upgrade typing_extensions




In [2]:
# Import the libraries
import os
import re
import string
import pickle
import numpy as np
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import uvicorn
import nest_asyncio
from pyngrok import ngrok
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [4]:

# specify a port
port = 8000
ngrok_tunnel = ngrok.connect(port)

nest_asyncio.apply()

# where we can visit our fastAPI app
print('Public URL:', ngrok_tunnel.public_url)

app = FastAPI()

Public URL: https://9d6c-34-41-134-65.ngrok-free.app


In [5]:
# Enable CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["GET", "POST", "OPTIONS"],
    allow_headers=["Content-Type"],
)

In [6]:
class PredictRequest(BaseModel):
    text: str

# Load the trained model, TF-IDF vectorizer, and label encoder
model_path = "finalized_model.pkl"
tfidf_path = "tfidf_vectorizer.pkl"
label_encoder_path = "label_encoder.pkl"

if os.path.exists(model_path) and os.path.exists(tfidf_path) and os.path.exists(label_encoder_path):
    with open(model_path, "rb") as file:
        model = pickle.load(file)
    with open(tfidf_path, "rb") as file:
        tfidf = pickle.load(file)
    with open(label_encoder_path, "rb") as file:
        label_encoder = pickle.load(file)
    print("All necessary files are loaded successfully.")
else:
    print("One or more necessary files do not exist. Make sure 'finalized_model.pkl', 'tfidf_vectorizer.pkl', and 'label_encoder.pkl' are present.")

# Define the language map
language_map = {
    'ar': 'Arabic', 'bg': 'Bulgarian', 'de': 'German', 'el': 'Modern Greek', 'en': 'English',
    'es': 'Spanish', 'fr': 'French', 'hi': 'Hindi', 'it': 'Italian', 'ja': 'Japanese',
    'nl': 'Dutch', 'pl': 'Polish', 'pt': 'Portuguese', 'ru': 'Russian', 'sw': 'Swahili',
    'th': 'Thai', 'tr': 'Turkish', 'ur': 'Urdu', 'vi': 'Vietnamese', 'zh': 'Chinese'
}


All necessary files are loaded successfully.


In [7]:
# Preprocess text
def preprocess_text(text):
    # Lowercase the text
    text = text.lower()
    # Remove HTML tags
    text = re.sub(r'<.*?>', '', text)
    # Remove URLs
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    # Remove punctuation
    text = ''.join([ch for ch in text if ch not in set(string.punctuation)])
    # Tokenize text
    text = word_tokenize(text)
    # Convert list of tokens back to string
    text = ' '.join(text)
    return text

In [8]:
@app.post("/predict")
async def predict_language(request: PredictRequest):
    try:
        # Preprocess the input text
        preprocessed_text = preprocess_text(request.text)

        # Transform the input text using the loaded TF-IDF vectorizer
        text_transformed = tfidf.transform([preprocessed_text])

        # Make prediction using the trained model
        prediction = model.predict(text_transformed)[0]

        # Convert the numerical prediction back to the original language label
        language_label = label_encoder.inverse_transform([prediction])[0]

        # Map the language label to the language name
        language_name = language_map[language_label]

        # Return the predicted language name as a response
        return {"Identified Language": language_name}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Setup ngrok
port = 8000
public_url = ngrok.connect(port)
print(f"Public URL: {public_url}")

# Start the server
if __name__ == "__main__":
    nest_asyncio.apply()
    uvicorn.run(app, host="0.0.0.0", port=port)

INFO:     Started server process [3576]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


Public URL: NgrokTunnel: "https://775a-34-41-134-65.ngrok-free.app" -> "http://localhost:8000"
INFO:     49.37.114.188:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     49.37.114.188:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     49.37.114.188:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     49.37.114.188:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     49.37.114.188:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     49.37.114.188:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     49.37.114.188:0 - "POST /predict HTTP/1.1" 200 OK
INFO:     49.37.114.188:0 - "POST /predict HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [3576]


KeyboardInterrupt: 