In [5]:
import pandas as pd
import numpy as np
import sys
import os
sys.path.append(os.path.join(os.getcwd(), '../src'))

from models.baseline_models_0149 import BaselineModels
import pickle

print("🚀 QUICK TEST - Member 0149")
print("=" * 40)

🚀 QUICK TEST - Member 0149


In [8]:
# Test 1: Data Loading
print("\\n Test 1: Data Loading")
try:
    train_df = pd.read_csv('../data/processed/train_processed.csv')
    print(f" Training data loaded: {train_df.shape}")
    print(f"   Sample labels: {train_df['label'].value_counts().head()}")
except Exception as e:
    print(f" Data loading failed: {e}")


\n Test 1: Data Loading
 Training data loaded: (10240, 14)
   Sample labels: label
half-true      2114
false          1995
mostly-true    1962
true           1676
barely-true    1654
Name: count, dtype: int64


In [15]:
# Test 2: Text Preprocessing
print("\\n🔧 Test 2: Text Preprocessing")
baseline = BaselineModels()
test_texts = [
    "This is a TEST statement with NUMBERS 123!",
    "The president said something important today.",
    ""
]

for text in test_texts:
    clean_text = baseline.preprocessor.clean_text(text)
    print(f"Original: '{text}'")
    print(f"Cleaned:  '{clean_text}'")
    print()

\n🔧 Test 2: Text Preprocessing
Original: 'This is a TEST statement with NUMBERS 123!'
Cleaned:  'this is a test statement with numbers'

Original: 'The president said something important today.'
Cleaned:  'the president said something important today'

Original: ''
Cleaned:  ''



In [17]:
 #Test 3: Quick Model Training (small sample)
print("\\n Test 3: Quick Model Training")
if 'train_df' in locals():
    # Use small sample for quick test
    sample_df = train_df.sample(n=1000, random_state=42)
    
    X_sample = baseline.preprocessor.preprocess_texts(sample_df['statement'])
    y_sample = [baseline.label_mapping.get(label, 0) for label in sample_df['label']]
    
    print(f"Sample size: {len(X_sample)}")
    print(f"Label distribution: {pd.Series(y_sample).value_counts().to_dict()}")

\n🏋️ Test 3: Quick Model Training
Sample size: 1000
Label distribution: {3: 207, 4: 201, 1: 185, 2: 167, 5: 166, 0: 74}


In [20]:
  # Test TF-IDF pipeline creation
try:
        tfidf_lr = baseline.create_tfidf_logistic_model()
        print(" TF-IDF + Logistic Regression pipeline created")
        
        tfidf_rf = baseline.create_tfidf_rf_model() 
        print(" TF-IDF + Random Forest pipeline created")
except Exception as e:
        print(f" Pipeline creation failed: {e}")

 TF-IDF + Logistic Regression pipeline created
 TF-IDF + Random Forest pipeline created


In [23]:
# Test 4: Model Loading (if models exist)
print("\\n Test 4: Model Loading")
model_files = [
    'models/baseline/tfidf_logistic_0149.pkl',
    'models/baseline/tfidf_rf_0149.pkl'
]

for model_file in model_files:
    try:
        with open(model_file, 'rb') as f:
            model = pickle.load(f)
        print(f" {model_file} loaded successfully")

        # Quick prediction test
        test_statement = "The economy is doing great"
        pred = model.predict([test_statement])[0]
        prob = max(model.predict_proba([test_statement])[0])
        print(f"   Test prediction: {baseline.reverse_label_mapping[pred]} (confidence: {prob:.3f})")
        
    except FileNotFoundError:
        print(f" {model_file} not found (not trained yet)")
    except Exception as e:
        print(f" {model_file} loading failed: {e}")

print("\\n Quick test completed!")
        

\n Test 4: Model Loading
 models/baseline/tfidf_logistic_0149.pkl not found (not trained yet)
 models/baseline/tfidf_rf_0149.pkl not found (not trained yet)
\n Quick test completed!
