In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [2]:
# Load the train and test TSV files
train_data = pd.read_csv('/content/test.tsv', sep='\t')
test_data = pd.read_csv('/content/train.tsv', sep='\t')

In [3]:
# Split the train data into training and validation sets
train_data, val_data = train_test_split(train_data, test_size=0.2)

In [4]:
# Vectorize the training data using a TfidfVectorizer
vectorizer = TfidfVectorizer()
train_vectors = vectorizer.fit_transform(train_data['text'])

In [6]:
# Train an SVM classifier for each target
svm_classifiers = {}
targets = ['Trump','Biden','West','HOF']
for target in targets:
    svm_classifier = SVC(kernel='linear', C=1.0, class_weight='balanced')
    svm_classifier.fit(train_vectors, train_data[target])
    svm_classifiers[target] = svm_classifier


In [7]:
# Vectorize the validation and test data using the same vectorizer
val_vectors = vectorizer.transform(val_data['text'])
test_vectors = vectorizer.transform(test_data['text'])

In [8]:
# Make predictions using the trained SVM classifiers on validation and test data
val_predictions = {}
test_predictions = {}
for target in targets:
    val_predictions[target] = svm_classifiers[target].predict(val_vectors)
    test_predictions[target] = svm_classifiers[target].predict(test_vectors)

In [9]:
# Compute the accuracy for each target on validation data
val_accuracies = {}
for target in targets:
    val_accuracies[target] = accuracy_score(val_data[target], val_predictions[target])
    print(f"Validation accuracy for target {target}: {val_accuracies[target]:.4f}")


Validation accuracy for target Trump: 0.7250
Validation accuracy for target Biden: 0.7417
Validation accuracy for target West: 1.0000
Validation accuracy for target HOF: 0.9000


In [11]:
# Compute the average accuracy across all targets on validation data
average_val_accuracy = sum(val_accuracies.values()) / len(val_accuracies)
print("Average validation accuracy: ", average_val_accuracy)

Average validation accuracy:  0.8416666666666667


In [13]:
# Save the predictions on test data to a CSV file
test_output = pd.DataFrame(test_predictions, columns=targets)
test_output.to_csv('test_output.csv', index=False)