In [None]:
nr_input_review = globals().get('nr_input_review', "")  # Default to empty string if not provided
nr_input_rating = globals().get('nr_input_rating', 0.0)  # Default to 0.0 if not provided

nr_input_rating = int(nr_input_rating)  
nr_input_review = str(nr_input_review) 

# Make sure the nrs are properly received
print(f"Received Review: {nr_input_review}")
print(f"Received Rating: {nr_input_rating}")

Received Review: 
Received Rating: 0


In [None]:
import papermill as pm
import sys

# Read input parameters
try:
    nr_input_review = nr_input_review  # This comes from papermill
    nr_input_rating = nr_input_rating
except NameError:
    nr_input_review = "Default review"
    nr_input_rating = 5 # Default values for testing

print(f"Processing Review: {nr_input_review}")
print(f"Processing Rating: {nr_input_rating}")

Processing Review: 
Processing Rating: 0


In [None]:
nr_temp_review = nr_input_review
nr_temp_rating = nr_input_rating 

In [5]:
print("Starting Review Predictions")

Starting Review Predictions


In [None]:
# Load Saved Models

import joblib
import fasttext
from gensim.models import Word2Vec
from scipy.sparse import hstack

try:
    print("Loading models...")
    svm_model = joblib.load("roman_svm_model.pkl")
    tfidf_vectorizer = joblib.load("roman_tfidf_vectorizer.pkl")
    word2vec_model = joblib.load("roman_word2vec_model.pkl")
    ft_model = fasttext.load_model("roman_fasttext_model.bin")
    rf_model = joblib.load("roman_rf_model.pkl")
    lr_model = joblib.load("roman_lr_model.pkl")
    
    
    print("Models loaded successfully!")
except Exception as e:
    print(f"Error loading models: {e}")
    exit()

Loading models...
Models loaded successfully!


In [7]:
import pandas as pd
import numpy as np
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\achyu\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\achyu\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\achyu\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [8]:
nr_input_review = nr_input_review.lower()

In [9]:
nr_input_review

''

In [10]:
import re

def normalize_text(text):
    
    text = re.sub(r'\bu\b', 'timi', text)  
    text = re.sub(r'\bm\b', 'ma', text)
    text = re.sub(r'\beka\b', 'ek', text)

    # Remove extra spaces and normalize spacing
    text = re.sub(r'\s+', ' ', text).strip()
    return text

nr_input_review = normalize_text(nr_input_review)

In [11]:
nr_input_review 

''

In [12]:
def remove_noise(text):
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = re.sub(r'nga', '', text)
     
    return text

nr_input_review = remove_noise(nr_input_review)

In [13]:
nr_input_review 

''

In [14]:
import emoji

def convert_emojis_to_text(text):
    return emoji.demojize(text, delimiters=(" ", " "))


nr_input_review = convert_emojis_to_text(nr_input_review)

In [15]:
nr_input_review 

''

In [None]:
def handle_slang(text):
    slang_dict = {'thikkk': 'thik', 
        'ghamta': 'samta', 
        'farkera': 'pachhi',
        'xa': 'cha',
        'hoina': 'haina',
        'k': 'ke',
        'khoi': 'kahaan',
        'kati': 'kati',
        'k garne': 'ke garne',
        'thaxa': 'thaha',
        'thaxaina': 'thaha chaina',
        'kya': 'kya ho',
        'la': 'la',
        'hait': 'hait',
        'dherai': 'dherai',
        'ali': 'ali',
        'kasto': 'kasto',
        'k cha': 'ke cha',
        'kura': 'kura',
        'khate': 'khate',
        'dai': 'dai',
        'didi': 'didi',
        'bhai': 'bhai',
        'bahini': 'bahini',
        'muji': 'muji',
        'kukur': 'kukur',
        'jasto': 'jasto',
        'testo': 'testo',
        'yesto': 'yesto',
        'kina': 'kina',
        'huncha': 'huncha',
        'hunna': 'hunna',
        'pugyo': 'pugyo',
        'pugena': 'pugena',
        'khaana': 'khaana',
        'khayo': 'khayo',
        'khana': 'khana',
        'bas': 'bas',
        'chhito': 'chhito',
        'bholi': 'bholi',
        'aaja': 'aaja',
        'parla': 'parla',
        'pardaina': 'pardaina',
        'thik': 'thik',
        'thikai': 'thikai',
        'ramro': 'ramro',
        'naramro': 'naramro',
        'khatra': 'khatra',
        'halka': 'halka',
        'maile': 'maile',
        'timi': 'timi',
        'huss': 'huss',
        'guff': 'guff',
        'jhyau': 'jhyau',
        'khuro': 'khuro',
        'lado': 'lado',
        'thulo': 'thulo',
        'sano': 'sano',
        'khaire': 'khaire',
        'jholey': 'jholey',
        'fuchhey': 'fuchhey',
        'khatey': 'khatey',
        'boka': 'boka',
        'bokey': 'bokey',
        'bokeycha': 'bokeycha',
        'bokeko': 'bokeko',
        'bokera': 'bokera',
        'bokne': 'bokne',
        'boknu': 'boknu',
        'boknus': 'boknus',
        'boknuparne': 'boknuparne',
        'boknuparyo': 'boknuparyo',
        'boknuparcha': 'boknuparcha',
        'xa' : 'cha',
        'khai': 'malai tha xaina',
        'gr8': 'great',
        'bro': 'bhai',
        'thik xa': 'thik cha',
        'k xa': 'ke cha',
        'momo': 'dumpling',
    }
    for slang, standard in slang_dict.items():
        text = text.replace(slang, standard)
    return text

nr_input_review = handle_slang(nr_input_review)

In [17]:
nr_input_review 

''

In [None]:
stopwords = ['ra', 'ko', 'le', 'lai', 'bata', 'xa', 'yo', 'tiyo', 'mero', 'maile', 'ma', 'lagi', 'mana', 'malai', 'ho', 'tara', 'pani',  'chan', 'garna', 'hunxa', 'of', 'a', 'an', 'the', 'is', 'and', 'but' ]
def remove_stopwords(text):
    text = ' '.join([word for word in text.split() if word not in stopwords])
    return text

nr_input_review = remove_stopwords(nr_input_review)

In [19]:
nr_input_review 

''

In [20]:
def tokenize(text):
    return text.split()

nr_input_review = tokenize(nr_input_review)

In [21]:
nr_input_review 

[]

In [22]:
import re


lemmatizer_dict = {
    'gardaichha': 'garnu',
    'garchha': 'garnu',
    'garera': 'garnu',
    'garne': 'garnu',
    'bhayeko': 'bhayeko',
    'jane': 'jan',
    'huncha': 'hunu',
    'hune': 'hunu',
    'pugne': 'pugnu',
    'garne': 'garnu',
    'chha': 'cha',
    'aune': 'aunu',
    'jane': 'jan',
    'dekhe': 'dekhnus',
    'garaune': 'garnu',
    'jaane': 'jan'
}


def lemmatize(text):
    
    if isinstance(text, list):
        text = ' '.join(text)  


    words = text.split()
    lemmatized_words = []

    for word in words:
        
        if word in lemmatizer_dict:
            lemmatized_words.append(lemmatizer_dict[word])
        else:
            lemmatized_words.append(word)

    
    return ' '.join(lemmatized_words)


In [23]:
nr_input_review 

[]

In [24]:
def average_word2vec(text):
    words = text.split()
    known_words = [word for word in words if word in word2vec_model.wv]
    if not known_words:
        return np.zeros(100)
    return np.mean([word2vec_model.wv[word] for word in known_words], axis=0)


In [25]:
# Function to Convert Text to FastText Features
def average_fasttext(text):
    words = text.split()
    vectors = [ft_model.get_word_vector(word) for word in words if word in ft_model.words]
    if len(vectors) == 0:
        return np.zeros(300)  # Zero vector if no words match
    return np.mean(vectors, axis=0)

In [None]:
# Function to Predict Review Authenticity
def predict_review(nr_input_review):
    try:
        print(f"\nOriginal Input: {nr_temp_review}")
        print(f"Review: {nr_temp_rating}\n")

        # Step 1: Preprocess Text
        processed_text = lemmatize(nr_input_review)
        

        # Step 2: Extract Features
        tfidf_features = tfidf_vectorizer.transform([processed_text])
        

        word2vec_features = np.array([average_word2vec(processed_text)])
        

        fasttext_features = np.array([average_fasttext(processed_text)])
        

        # Step 3: Combine Features
        try:
            combined_features = hstack([tfidf_features, word2vec_features, fasttext_features])
        except Exception as e:
            print(f"Error in feature stacking: {e}")
            return

        # Step 4: Make Prediction
        try:
            prediction = int(svm_model.predict(combined_features)[0])
        except Exception as e:
            print(f"Error in prediction: {e}")
            return

        # Step 5: Get Probability Score (if available)
        try:
            probability = svm_model.predict_proba(combined_features)[0][prediction]
            print(f"Predicted Review Authenticity: {'REAL' if prediction == 1 else 'FAKE'} (Confidence: {probability:.2f})")
        except Exception as e:
            print(f"Error in probability calculation: {e}")
            probability = None  # Some models don’t support predict_proba()

        return "REAL" if prediction == 1 else "FAKE"

    except Exception as e:
        print(f"Unexpected error: {str(e)}")
        return None


In [None]:

result = predict_review(' '.join(nr_input_review))  # Ensure input is a string
print(f"Final Prediction: {result}")


Original Input: 
Rating: 0

Error in probability calculation: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices
Final Prediction: REAL
