In [4]:
import pandas as pd
import csv
from imblearn.over_sampling import SMOTE
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import HashingVectorizer
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report

In [5]:
from joblib import load

# Load the trained model
logistic_vanilla = load('logistic_vanilla.joblib')
tfidf_vectorizer = load('tfidf_vectorizer.joblib')
# Now you can use 'logistic_vanilla' for predictions
logistic_vanilla_source_map = load('logistic_vanilla_source_map.joblib')
tfidf_vectorizer_source_map = load('tfidf_vectorizer_source_map.joblib')


In [6]:
def read_data(csv_file):
    data =[]
    with open(csv_file, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        for row in reader:
            data.append(row)

    header = data[0]

    # Create DataFrame from the remaining rows (excluding the header row)
    df = pd.DataFrame(data[1:], columns=header)
    df.rename(columns={header[0]: 'Bookmark'}, inplace=True)
    return df

In [7]:
def predict_bookmark(test_data):
    # Preprocess the text data
    X_text_test = test_data[['Summary', 'Headline', 'Source']].astype(str).apply(lambda x: ' '.join(x), axis=1)
    X_text_test_tfidf = tfidf_vectorizer.transform(X_text_test)

    # Concatenate TF-IDF features with Sentiment feature
    X_test_processed = pd.concat([pd.DataFrame(X_text_test_tfidf.toarray()), test_data['Summary Sentiment']], axis=1)

    # Make predictions
    predictions = logistic_vanilla.predict(X_test_processed)

    # Add predicted class to the DataFrame
    test_data['Predicted_bookmark'] = predictions

    return test_data

In [8]:
def predict_bookmark_source_map(test_data):
    # Preprocess the text data
    X_text_test = test_data[['Summary', 'Headline']].astype(str).apply(lambda x: ' '.join(x), axis=1)
    X_text_test_tfidf = tfidf_vectorizer_source_map.transform(X_text_test)

    test_data['Source'] = test_data['Source'].map({'Zerodha': 0, 'Money Control': 1, 'Economic Times': 2})
    # Concatenate TF-IDF features with Sentiment feature
    X_test_processed = pd.concat([pd.DataFrame(X_text_test_tfidf.toarray()), test_data[['Summary Sentiment', 'Source']]], axis=1)

    # Make predictions
    predictions = logistic_vanilla_source_map.predict(X_test_processed)

    # Add predicted class to the DataFrame
    test_data['Predicted_bookmark'] = predictions

    return test_data

In [9]:
csv_file = "C:\\Users\\taswi\\Python Project\\export news\\news_2024-02-18_00-03-46.csv"
test_data = read_data(csv_file)
test_data.head()

Unnamed: 0,Bookmark,Datetime,Summary Sentiment,Headline,Summary,Source
0,No,57:39.6,0.99,Kamal Haasan unveils Sivakarthikeyan-Sai Palla...,"This week, the teaser of Sivakarthikeyan's Tam...",Zerodha
1,No,57:39.6,-0.98,India sends relief material to cholera-affecte...,The aid weighing approx 3.5 tons comprises wat...,Zerodha
2,No,57:39.6,0.97,Not a surveillance satellite: ISRO on collabor...,The Bengaluru-headquartered ISRO is involved w...,Zerodha
3,No,57:39.6,-0.99,Farmers' protest: Haryana extends mobile inter...,Protesting farmers have stayed put at Shambhu ...,Zerodha
4,No,57:39.6,0.98,"Hold LS, assembly polls together but not with ...","In a memorandum submitted to the committee, JD...",Zerodha


In [10]:
# Example usage:
predicted_data = predict_bookmark(test_data)



In [11]:
display(predicted_data)

Unnamed: 0,Bookmark,Datetime,Summary Sentiment,Headline,Summary,Source,Predicted_bookmark
0,No,57:39.6,0.99,Kamal Haasan unveils Sivakarthikeyan-Sai Palla...,"This week, the teaser of Sivakarthikeyan's Tam...",Zerodha,0
1,No,57:39.6,-0.98,India sends relief material to cholera-affecte...,The aid weighing approx 3.5 tons comprises wat...,Zerodha,0
2,No,57:39.6,0.97,Not a surveillance satellite: ISRO on collabor...,The Bengaluru-headquartered ISRO is involved w...,Zerodha,0
3,No,57:39.6,-0.99,Farmers' protest: Haryana extends mobile inter...,Protesting farmers have stayed put at Shambhu ...,Zerodha,0
4,No,57:39.6,0.98,"Hold LS, assembly polls together but not with ...","In a memorandum submitted to the committee, JD...",Zerodha,0
...,...,...,...,...,...,...,...
240,No,16/02/2024 10:19,-0.96,"Sensex, Nifty rise on positive global cues; an...",Experts expect markets to behave in a range-bo...,Money Control,0
241,No,16/02/2024 10:18,-0.99,Entero Healthcare Solutions off to a weak star...,Entero Healthcare Solutions IPO: The shares op...,Money Control,0
242,No,16/02/2024 10:18,-0.99,Gujarat Gas falls 6% as brokerages turn bearis...,Volumes were impacted by a further market shar...,Money Control,0
243,Yes,16/02/2024 10:18,0.99,HDFC net profit surges over 220% YoY in Q3,"EBITDA surged 30.9% to Rs 645 Crores, with EBI...",Money Control,1


In [12]:
predicted_data.Predicted_bookmark.value_counts()

0    243
1      2
Name: Predicted_bookmark, dtype: int64

In [13]:
predict_bookmark_source_data = predict_bookmark_source_map(test_data)



In [14]:
display(predict_bookmark_source_data)

Unnamed: 0,Bookmark,Datetime,Summary Sentiment,Headline,Summary,Source,Predicted_bookmark
0,No,57:39.6,0.99,Kamal Haasan unveils Sivakarthikeyan-Sai Palla...,"This week, the teaser of Sivakarthikeyan's Tam...",0,0
1,No,57:39.6,-0.98,India sends relief material to cholera-affecte...,The aid weighing approx 3.5 tons comprises wat...,0,0
2,No,57:39.6,0.97,Not a surveillance satellite: ISRO on collabor...,The Bengaluru-headquartered ISRO is involved w...,0,0
3,No,57:39.6,-0.99,Farmers' protest: Haryana extends mobile inter...,Protesting farmers have stayed put at Shambhu ...,0,0
4,No,57:39.6,0.98,"Hold LS, assembly polls together but not with ...","In a memorandum submitted to the committee, JD...",0,0
...,...,...,...,...,...,...,...
240,No,16/02/2024 10:19,-0.96,"Sensex, Nifty rise on positive global cues; an...",Experts expect markets to behave in a range-bo...,1,0
241,No,16/02/2024 10:18,-0.99,Entero Healthcare Solutions off to a weak star...,Entero Healthcare Solutions IPO: The shares op...,1,0
242,No,16/02/2024 10:18,-0.99,Gujarat Gas falls 6% as brokerages turn bearis...,Volumes were impacted by a further market shar...,1,0
243,Yes,16/02/2024 10:18,0.99,HDFC net profit surges over 220% YoY in Q3,"EBITDA surged 30.9% to Rs 645 Crores, with EBI...",1,1


In [15]:
predict_bookmark_source_data.Predicted_bookmark.value_counts()

0    243
1      2
Name: Predicted_bookmark, dtype: int64