In [1]:
import requests
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Dict, Any
from datetime import datetime

In [2]:
import mlflow
import numpy as np
import joblib
import re
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from mlflow.tracking import MlflowClient
import matplotlib.dates as mdates

def preprocess_comment(comment: str) -> str:
    try:
        comment = comment.lower()
        comment = comment.strip()
        comment = re.sub(r"\n", " ", comment)
        comment = re.sub(r"[^A-Za-z0-9\s!?.,]", "", comment)

        stop_words = set(stopwords.words("english")) - {
            "not", "but", "however", "no", "yet"
        }

        comment = " ".join(
            [word for word in comment.split() if word not in stop_words]
        )

        lemmatizer = WordNetLemmatizer()
        comment = " ".join(
            [lemmatizer.lemmatize(word) for word in comment.split()]
        )

        return comment

    except Exception as e:
        print(f"Error in preprocessing comment: {e}")
        return comment

# -------------------------
# MLflow loader
# -------------------------
def load_model_and_vectorizer(model_name, model_version, vectorizer_path):
    mlflow.set_tracking_uri(
        "https://dagshub.com/Pranay5519/yt-comment-sentiment-analysis-2.mlflow"
    )

    client = MlflowClient()

    model_uri = f"models:/{model_name}/{model_version}"
    model = mlflow.lightgbm.load_model(model_uri)

    vectorizer = joblib.load(vectorizer_path)

    return model, vectorizer

model, vectorizer = load_model_and_vectorizer(
    "ligbm_model_v1", "1", r"D:\yt-comment-sentiment-analysis2\tfidf_vectorizer.pkl"
)


  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:03<00:00,  2.13it/s]


In [3]:
model , vectorizer

(LGBMClassifier(class_weight='balanced', is_unbalance=True, learning_rate=0.09,
                max_depth=20, metric='multi_logloss', n_estimators=367,
                num_class=3, objective='multiclass', reg_alpha=0.1,
                reg_lambda=0.1),
 TfidfVectorizer(max_features=10000, ngram_range=(1, 3)))

In [11]:
class Comment(BaseModel):
    text: str
    timestamp: datetime
    authorId: str

class PredictRequest(BaseModel):
    comments: List[Comment]
    
class PredictWithTimestampRequest(BaseModel):
    comments : List[Comment]

In [37]:

def predict(comments: PredictRequest):
    
    if not comments:
        raise HTTPException(status_code=400, detail="No comments provided")

    try:
        # Preprocess comments
        preprocessed_comments = [
            preprocess_comment(c.text) for c in comments.comments
        ]
        print("preprocessing Done")
        # Vectorize comments (sparse matrix)
        transformed_comments = vectorizer.transform(preprocessed_comments)
        print("Transformation Done")

        # Get expected schema columns from MLflow model
        #input_schema = model.metadata.get_input_schema()
        #expected_columns = input_schema.input_names()

        # Convert sparse matrix to DataFrame with vectorizer features
        feature_names = vectorizer.get_feature_names_out()
        df = pd.DataFrame(
                    transformed_comments.toarray(),
                    columns=feature_names
                )
        print("Data Frame Generated")
# ðŸ”¥ correct alignment for MLflow
        #df = df.reindex(columns=expected_columns, fill_value=0.0)
        ## Reorder columns exactly as model expects
        #df = df[expected_columns]

        # Make predictions
        predictions = model.predict(df).tolist()
        # Convert predictions to strings
        predictions = [str(pred) for pred in predictions]
        print("predictions : ", predictions)

    except Exception as e:
        raise HTTPException(
            status_code=500,
            detail=f"Prediction failed: {str(e)}"
        )

    # Return response (same structure as Flask)
    response = [
        {"comment": comment, "sentiment": sentiment}
        for comment, sentiment in zip(comments.comments, predictions)
    ]

    return response

In [7]:
comments = [{'text': "Come get a real Land Cruiser in South Africa, here you can actually do shit like you can in america, as long as you arent endangering anyone you can do whatever you want and theres no filming restrictions either. Id argue we're even more free than the US in these regards. And we also have the new Hilux's as well the land cruisers",
  'timestamp': '2026-01-30T14:37:22Z',
  'authorId': 'UCWUoKxg8-WVQZzERTnW8IBg'},
 {'text': 'we were promised a Bugatti durability test and instead got a campout in a pos Toyota in sand.',
  'timestamp': '2026-01-30T14:37:20Z',
  'authorId': 'UC6lZYcaMIhNAbbZ6xkn6FDg'},
 {'text': 'Buy property in Alaska!!!',
  'timestamp': '2026-01-30T14:37:18Z',
  'authorId': 'UCoZE-D5YtR8qH-fLhNGiwqg'}]

In [10]:
predict(comments)

[{'comment': {'text': "Come get a real Land Cruiser in South Africa, here you can actually do shit like you can in america, as long as you arent endangering anyone you can do whatever you want and theres no filming restrictions either. Id argue we're even more free than the US in these regards. And we also have the new Hilux's as well the land cruisers",
   'timestamp': '2026-01-30T14:37:22Z',
   'authorId': 'UCWUoKxg8-WVQZzERTnW8IBg'},
  'sentiment': '1'},
 {'comment': {'text': 'we were promised a Bugatti durability test and instead got a campout in a pos Toyota in sand.',
   'timestamp': '2026-01-30T14:37:20Z',
   'authorId': 'UC6lZYcaMIhNAbbZ6xkn6FDg'},
  'sentiment': '0'},
 {'comment': {'text': 'Buy property in Alaska!!!',
   'timestamp': '2026-01-30T14:37:18Z',
   'authorId': 'UCoZE-D5YtR8qH-fLhNGiwqg'},
  'sentiment': '0'}]

In [27]:
comments = {
  "comments": [
    {
      "text": "Come get a real Land Cruiser in South Africa, here you can actually do shit like you can in america, as long as you arent endangering anyone you can do whatever you want and theres no filming restrictions either. Id argue we're even more free than the US in these regards. And we also have the new Hilux's as well the land cruisers",
      "timestamp": "2026-01-30T14:37:22Z",
      "authorId": "UCWUoKxg8-WVQZzERTnW8IBg"
    },
    {
      "text": "we were promised a Bugatti durability test and instead got a campout in a pos Toyota in sand.",
      "timestamp": "2026-01-30T14:37:20Z",
      "authorId": "UC6lZYcaMIhNAbbZ6xkn6FDg"
    },
    {
      "text": "Buy property in Alaska!!!",
      "timestamp": "2026-01-30T14:37:18Z",
      "authorId": "UCoZE-D5YtR8qH-fLhNGiwqg"
    }
  ]
}


In [36]:
for item in comments["comments"]:
    print(item["text"])

Come get a real Land Cruiser in South Africa, here you can actually do shit like you can in america, as long as you arent endangering anyone you can do whatever you want and theres no filming restrictions either. Id argue we're even more free than the US in these regards. And we also have the new Hilux's as well the land cruisers
we were promised a Bugatti durability test and instead got a campout in a pos Toyota in sand.
Buy property in Alaska!!!
