In [2]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle

nltk.download('wordnet')
nltk.download('stopwords')

with open('pickles\\tfidf.pickle','rb') as data:
    tfidf = pickle.load(data)
    
with open('models\\svm.pickle','rb') as data:
    svm = pickle.load(data)

#tranforms the text into features.

def transform_text(txt):
    df = pd.DataFrame([txt],columns=['Content'])
    signs = list("?!,.:;")
    lemmatizer = WordNetLemmatizer()
    lemmatized_list = []
    stop_words = list(stopwords.words('english'))
    df['Content_parsed'] = df['Content'].str.replace('\r',' ')
    df['Content_parsed'] = df['Content_parsed'].str.replace('\n',' ')
    df['Content_parsed'] = df['Content_parsed'].str.replace('   ',' ')
    df['Content_parsed'] = df['Content_parsed'].str.replace('  ',' ')
    df['Content_parsed'] = df['Content_parsed'].str.replace('"','')
    df['Content_parsed'] = df['Content_parsed'].str.lower()
    for sign in signs:
        df['Content_parsed'] = df['Content_parsed'].str.replace(sign,'')
    df['Content_parsed'] = df['Content_parsed'].str.replace("'s","")
    words = df['Content_parsed'][0].split(" ")
    for word in words:
        lemmatized_list.append(lemmatizer.lemmatize(word,pos='v'))
    lemmatized_text = " ".join(lemmatized_list)
    df['Content_parsed'] = lemmatized_text
    for stop_word in stop_words:
        regex_stop = r"\b" + stop_word + r"\b"
        df['Content_parsed'] = df['Content_parsed'].str.replace(regex_stop,'')
    df['Content_parsed'] = df['Content_parsed'].str.replace('\\s+',' ')
    features = tfidf.transform(df['Content_parsed']).toarray()
    return features

#get the category name from predicted category code

def get_category(code):
    cat_code = {
    0:'business',
    1:'entertainment',
    2:'politics',
    3:'sport',
    4:'tech'}
    return cat_code[code]

#predicting the category

def predict_cat(txt):
    for_pred = transform_text(txt)
    result = get_category(svm.predict(for_pred)[0])
    prob = svm.predict_proba(for_pred)[0].max()*100
    if(prob<65):
        result = 'other'
    print('The result is :')
    print(result)
    print('With probability:')
    print(prob)
        

if __name__=='__main__':
    txt = """Following yesterday's trend, global spot prices continued its growth today as well. It was clocked at $1875.7 per Troy ounce in the latest close which noted an increase of 0.33% over yesterday. This price level is 7.25% higher than average gold price observed in the past 30 days ($1739.7). Among other precious metals, silver prices edged higher today. Silver gained 0.03% to $27.8 per Troy ounce.

Further, platinum price has shown an uptick. The precious metal platinum rose 0.5% to $1198.5 per Troy ounce. Meanwhile in India, gold was priced at ₹48525 per 10 gram on MCX, with a change of ₹135.9. Also, the price of 24k gold in the Indian spot market was quoted at ₹47000 . """
    predict_cat(txt)

The result is :
business
With probability:
99.35764803673919


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\cheri\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\cheri\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
