# Baseline Model for Google QUEST Challenge

This notebook implements a simple baseline using TF-IDF features and a linear model.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')

# Load data
train = pd.read_csv('/home/data/train.csv')
test = pd.read_csv('/home/data/test.csv')
sample_submission = pd.read_csv('/home/data/sample_submission.csv')

print(f"Train shape: {train.shape}")
print(f"Test shape: {test.shape}")
print(f"Sample submission shape: {sample_submission.shape}")

# Identify target columns
target_cols = [col for col in train.columns if col not in test.columns and col != 'qa_id']
print(f"Number of target columns: {len(target_cols)}")
print(f"Target columns: {target_cols[:5]}...")

Train shape: (6079, 41)
Test shape: (476, 11)
Sample submission shape: (476, 31)
Number of target columns: 30
Target columns: ['question_asker_intent_understanding', 'question_body_critical', 'question_conversational', 'question_expect_short_answer', 'question_fact_seeking']...


In [2]:
# Combine text features
train['text'] = train['question_title'].fillna('') + ' ' + train['question_body'].fillna('') + ' ' + train['answer'].fillna('')
test['text'] = test['question_title'].fillna('') + ' ' + test['question_body'].fillna('') + ' ' + test['answer'].fillna('')

# Create TF-IDF features
vectorizer = TfidfVectorizer(
    max_features=5000,
    stop_words='english',
    ngram_range=(1, 2),
    min_df=2,
    max_df=0.95
)

X_train_tfidf = vectorizer.fit_transform(train['text'])
X_test_tfidf = vectorizer.transform(test['text'])

print(f"TF-IDF feature shape: {X_train_tfidf.shape}")

# Prepare target matrix
y_train = train[target_cols].values
print(f"Target shape: {y_train.shape}")

TF-IDF feature shape: (6079, 5000)
Target shape: (6079, 30)


In [3]:
# Cross-validation setup
n_folds = 5
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)

# Store predictions
oof_predictions = np.zeros_like(y_train)
test_predictions = np.zeros((len(test), len(target_cols)))

# Train model for each target
scores = []

for target_idx, target in enumerate(target_cols):
    print(f"Training for target: {target} ({target_idx+1}/{len(target_cols)})")
    
    target_scores = []
    
    for fold, (train_idx, val_idx) in enumerate(kf.split(X_train_tfidf)):
        X_tr, X_val = X_train_tfidf[train_idx], X_train_tfidf[val_idx]
        y_tr, y_val = y_train[train_idx, target_idx], y_train[val_idx, target_idx]
        
        # Train Ridge regression
        model = Ridge(alpha=1.0, random_state=42)
        model.fit(X_tr, y_tr)
        
        # Predict
        val_pred = model.predict(X_val)
        oof_predictions[val_idx, target_idx] = val_pred
        
        # Calculate RMSE for this fold
        rmse = np.sqrt(mean_squared_error(y_val, val_pred))
        target_scores.append(rmse)
        
        # Predict on test
        test_pred = model.predict(X_test_tfidf)
        test_predictions[:, target_idx] += test_pred / n_folds
    
    mean_score = np.mean(target_scores)
    std_score = np.std(target_scores)
    scores.append(mean_score)
    print(f"  Mean RMSE: {mean_score:.4f} ± {std_score:.4f}")

overall_score = np.mean(scores)
print(f"\nOverall Mean RMSE: {overall_score:.4f}")

Training for target: question_asker_intent_understanding (1/30)


  Mean RMSE: 0.1311 ± 0.0035
Training for target: question_body_critical (2/30)
  Mean RMSE: 0.1991 ± 0.0035
Training for target: question_conversational (3/30)


  Mean RMSE: 0.1641 ± 0.0090
Training for target: question_expect_short_answer (4/30)
  Mean RMSE: 0.3580 ± 0.0040
Training for target: question_fact_seeking (5/30)


  Mean RMSE: 0.2887 ± 0.0072
Training for target: question_has_commonly_accepted_answer (6/30)
  Mean RMSE: 0.3033 ± 0.0025
Training for target: question_interestingness_others (7/30)


  Mean RMSE: 0.1352 ± 0.0011
Training for target: question_interestingness_self (8/30)


  Mean RMSE: 0.1697 ± 0.0040
Training for target: question_multi_intent (9/30)


  Mean RMSE: 0.3223 ± 0.0034
Training for target: question_not_really_a_question (10/30)


  Mean RMSE: 0.0481 ± 0.0066
Training for target: question_opinion_seeking (11/30)


  Mean RMSE: 0.3438 ± 0.0032
Training for target: question_type_choice (12/30)
  Mean RMSE: 0.3379 ± 0.0036
Training for target: question_type_compare (13/30)


  Mean RMSE: 0.1381 ± 0.0065
Training for target: question_type_consequence (14/30)
  Mean RMSE: 0.0734 ± 0.0104
Training for target: question_type_definition (15/30)


  Mean RMSE: 0.1191 ± 0.0090
Training for target: question_type_entity (16/30)
  Mean RMSE: 0.1736 ± 0.0047
Training for target: question_type_instructions (17/30)


  Mean RMSE: 0.3175 ± 0.0063
Training for target: question_type_procedure (18/30)
  Mean RMSE: 0.2623 ± 0.0037
Training for target: question_type_reason_explanation (19/30)


  Mean RMSE: 0.3459 ± 0.0068
Training for target: question_type_spelling (20/30)


  Mean RMSE: 0.0183 ± 0.0066
Training for target: question_well_written (21/30)


  Mean RMSE: 0.1704 ± 0.0025
Training for target: answer_helpful (22/30)


  Mean RMSE: 0.1197 ± 0.0035
Training for target: answer_level_of_information (23/30)


  Mean RMSE: 0.1097 ± 0.0018
Training for target: answer_plausible (24/30)
  Mean RMSE: 0.0919 ± 0.0024
Training for target: answer_relevance (25/30)


  Mean RMSE: 0.0784 ± 0.0027
Training for target: answer_satisfaction (26/30)
  Mean RMSE: 0.1349 ± 0.0021
Training for target: answer_type_instructions (27/30)


  Mean RMSE: 0.3232 ± 0.0055
Training for target: answer_type_procedure (28/30)
  Mean RMSE: 0.2322 ± 0.0056
Training for target: answer_type_reason_explanation (29/30)


  Mean RMSE: 0.3667 ± 0.0067
Training for target: answer_well_written (30/30)


  Mean RMSE: 0.1067 ± 0.0027

Overall Mean RMSE: 0.1994


In [4]:
# Calculate Spearman correlation (since that's the actual metric)
from scipy.stats import spearmanr

spearman_scores = []
for i, target in enumerate(target_cols):
    corr, _ = spearmanr(y_train[:, i], oof_predictions[:, i])
    spearman_scores.append(corr)
    print(f"{target}: {corr:.4f}")

mean_spearman = np.mean(spearman_scores)
print(f"\nMean Spearman correlation: {mean_spearman:.4f}")

question_asker_intent_understanding: 0.2594
question_body_critical: 0.4195
question_conversational: 0.3133
question_expect_short_answer: 0.1564
question_fact_seeking: 0.2499
question_has_commonly_accepted_answer: 0.3550
question_interestingness_others: 0.2556
question_interestingness_self: 0.4202
question_multi_intent: 0.3110
question_not_really_a_question: 0.0250
question_opinion_seeking: 0.3510
question_type_choice: 0.4055
question_type_compare: 0.2556
question_type_consequence: 0.1160
question_type_definition: 0.2950
question_type_entity: 0.3431
question_type_instructions: 0.6599
question_type_procedure: 0.1915
question_type_reason_explanation: 0.4365
question_type_spelling: 0.0715
question_well_written: 0.3436
answer_helpful: 0.0869
answer_level_of_information: 0.1636
answer_plausible: 0.0555
answer_relevance: 0.0566
answer_satisfaction: 0.1151
answer_type_instructions: 0.6509
answer_type_procedure: 0.1633
answer_type_reason_explanation: 0.4503
answer_well_written: 0.0589

Mean Spe

In [5]:
# Create submission
submission = pd.DataFrame({
    'qa_id': test['qa_id']
})

for i, target in enumerate(target_cols):
    submission[target] = test_predictions[:, i]

# Ensure predictions are in [0, 1] range
submission[target_cols] = submission[target_cols].clip(0, 1)

print(f"Submission shape: {submission.shape}")
print(f"Submission columns: {submission.columns.tolist()}")
print("\nFirst few predictions:")
print(submission.head())

# Save submission
submission.to_csv('/home/submission/submission.csv', index=False)
print("\nSubmission saved to /home/submission/submission.csv")

Submission shape: (476, 31)
Submission columns: ['qa_id', 'question_asker_intent_understanding', 'question_body_critical', 'question_conversational', 'question_expect_short_answer', 'question_fact_seeking', 'question_has_commonly_accepted_answer', 'question_interestingness_others', 'question_interestingness_self', 'question_multi_intent', 'question_not_really_a_question', 'question_opinion_seeking', 'question_type_choice', 'question_type_compare', 'question_type_consequence', 'question_type_definition', 'question_type_entity', 'question_type_instructions', 'question_type_procedure', 'question_type_reason_explanation', 'question_type_spelling', 'question_well_written', 'answer_helpful', 'answer_level_of_information', 'answer_plausible', 'answer_relevance', 'answer_satisfaction', 'answer_type_instructions', 'answer_type_procedure', 'answer_type_reason_explanation', 'answer_well_written']

First few predictions:
   qa_id  question_asker_intent_understanding  question_body_critical  \
0   