In [1]:
pip install lightgbm --install-option=--gpu

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split, GridSearchCV
from lightgbm import LGBMClassifier
from sklearn.metrics import make_scorer, f1_score

# Load the data
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

# Preprocess the text data using CountVectorizer
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(train_df['text'])
y_train = train_df['label']
X_test = vectorizer.transform(test_df['text'])

# Convert the features to float64
X_train = X_train.astype('float64')
X_test = X_test.astype('float64')

# Split the training data into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Define the LightGBM classifier and the hyperparameters to search over
lgbm = LGBMClassifier()
params = {
    'learning_rate': [0.05, 0.1, 0.15],
    'max_depth': [5, 10, 15],
    'n_estimators': [50, 100, 200]
}

# Define the scoring metric as macro F1 score
macro_f1_scorer = make_scorer(f1_score, average='macro')

# Perform grid search to find the best hyperparameters
grid_search = GridSearchCV(lgbm, params, cv=5, scoring=macro_f1_scorer, verbose=1, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Print the best hyperparameters and the corresponding macro F1 score
print("Best hyperparameters: ", grid_search.best_params_)
print("Macro F1 score: ", grid_search.best_score_)

# Make predictions on the test data using the best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Save the predictions to a submission file
submission_df = pd.DataFrame({'id': test_df['id'], 'label': y_pred})
submission_df.to_csv('submission.csv', index=False)
