In [7]:
import pandas as pd
import numpy as np
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.feature_selection import SelectKBest, f_classif
from scipy.sparse import hstack
import warnings
warnings.filterwarnings("ignore")

# üíæ Load the trained model
with open(r"rf_model.pkl", 'rb') as file:
    model = pickle.load(file)

print("‚úÖ Model loaded successfully!")

# üíæ Load preprocessors
with open('tfidf_vectorizer.pkl', 'rb') as f:
    tfidf = pickle.load(f)
with open('onehot_encoder.pkl', 'rb') as f:
    encoder = pickle.load(f)
with open('standard_scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)
with open('label_encoder.pkl', 'rb') as f:
    le = pickle.load(f)
with open('feature_selector.pkl', 'rb') as f:
    selector = pickle.load(f)

print("‚úÖ All preprocessors loaded successfully!")

# ‚úÖ Preprocessing Function for New Data
def preprocess_test_point(test_df, tfidf, encoder, scaler, selector):
    test_df = test_df.copy()

    # 1Ô∏è‚É£ Handle missing values
    test_df['speaker_job_title'] = test_df['speaker_job_title'].fillna("Not_available")
    test_df['state_info'].fillna('Unknown', inplace=True)
    test_df['context'].fillna('Unknown', inplace=True)
    test_df['party_affiliation'].fillna('Unknown', inplace=True)
    test_df['subject'].fillna('Unknown', inplace=True)
    test_df['speaker'].fillna('Unknown', inplace=True)

    # Fill numerical columns with 0
    numerical_cols = ['barely_true_counts', 'false_counts', 'half_true_counts', 
                     'mostly_true_counts', 'pants_on_fire_counts']
    for col in numerical_cols:
        test_df[col] = test_df[col].fillna(0.0)

    # 2Ô∏è‚É£ Text Feature Engineering (TF-IDF)
    X_text = tfidf.transform(test_df['statement'])

    # 3Ô∏è‚É£ Categorical Feature Engineering
    categorical_features = ['speaker_job_title', 'state_info', 'party_affiliation']
    X_cat = encoder.transform(test_df[categorical_features])

    # 4Ô∏è‚É£ Numerical Feature Engineering
    numerical_features = ['barely_true_counts', 'false_counts', 'half_true_counts',
                         'mostly_true_counts', 'pants_on_fire_counts']
    X_num = scaler.transform(test_df[numerical_features])

    # 5Ô∏è‚É£ Combine All Features
    X_combined = hstack([X_text, X_cat, X_num])
    X_dense = X_combined.toarray()

    # 6Ô∏è‚É£ Feature Selection
    X_selected = selector.transform(X_dense)

    return X_selected

# üéØ GET SINGLE STATEMENT FROM USER
def get_single_statement():
    print("\n" + "="*50)
    print("ü§ñ FAKE NEWS DETECTION SYSTEM")
    print("="*50)
    print("Please enter the statement details:\n")
    
    sample_data = {
        'ID': [1],
        'label': [''],  # Empty for prediction
        'statement': [input("Enter the statement: ")],
        'subject': [input("Enter subject/topic: ") or 'Unknown'],
        'speaker': [input("Enter speaker name: ") or 'Unknown'],
        'speaker_job_title': [input("Enter speaker job title: ") or 'Not_available'],
        'state_info': [input("Enter state/location: ") or 'Unknown'],
        'party_affiliation': [input("Enter party affiliation: ") or 'Unknown'],
        'barely_true_counts': [int(input("Enter barely true counts (default 0): ") or 0)],
        'false_counts': [int(input("Enter false counts (default 0): ") or 0)],
        'half_true_counts': [int(input("Enter half true counts (default 0): ") or 0)],
        'mostly_true_counts': [int(input("Enter mostly true counts (default 0): ") or 0)],
        'pants_on_fire_counts': [int(input("Enter pants on fire counts (default 0): ") or 0)],
        'context': [input("Enter context: ") or 'Unknown']
    }
    
    return sample_data

# üéØ MAIN FUNCTION
def main():
    # Get user input
    sample_data = get_single_statement()
    test_df = pd.DataFrame(sample_data)
    
    print("\n" + "="*50)
    print("üîÑ Processing your statement...")
    print("="*50)
    
    # ‚úÖ Preprocess the Test Point
    final_test = preprocess_test_point(test_df, tfidf, encoder, scaler, selector)
    
    # ‚úÖ Predict using trained model
    prediction = model.predict(final_test)
    prediction_proba = model.predict_proba(final_test)
    
    # Convert back to original label
    predicted_label = le.inverse_transform(prediction)[0]
    confidence = np.max(prediction_proba, axis=1)[0]
    
    print("\nüéØ PREDICTION RESULTS:")
    print(f"Statement: '{test_df['statement'].iloc[0]}'")
    print(f"üè∑Ô∏è Predicted Truthfulness: {predicted_label}")
    print(f"üìä Confidence: {confidence:.2%}")
    
    print("\nüìà Probability Distribution:")
    for i, class_name in enumerate(le.classes_):
        prob = prediction_proba[0][i]
        print(f"  {class_name}: {prob:.2%}")

# Run the main function
if __name__ == "__main__":
    main()
    

‚úÖ Model loaded successfully!
‚úÖ All preprocessors loaded successfully!

ü§ñ FAKE NEWS DETECTION SYSTEM
Please enter the statement details:


üîÑ Processing your statement...

üéØ PREDICTION RESULTS:
Statement: 'mostly in india abortions rate is around 65%'
üè∑Ô∏è Predicted Truthfulness: false
üìä Confidence: 77.94%

üìà Probability Distribution:
  barely-true: 5.76%
  false: 77.94%
  half-true: 4.80%
  mostly-true: 7.00%
  pants-fire: 0.50%
  true: 4.00%
