## 1. Importing necessary libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.linear_model import LogisticRegression


In [None]:
from google.colab import drive
drive.mount('/content/drive')

## 2. Prepare data

### 2.1. Load data

In [None]:
train_file = "/content/drive/MyDrive/data/TrainingSet.csv"
test_file = "/content/drive/MyDrive/data/TestSet.csv"

df_train = pd.read_csv(train_file)
df_test = pd.read_csv(test_file)


### 2.2. TF-IDF Vectorizaton

In [None]:
X_train = df_train['sentence']
y_train = df_train['label']

X_test = df_test['sentence']
y_test = df_test['label']

vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


## 3. Model training

### 3.1 Default parameter values

In [None]:
logistic_model = LogisticRegression(random_state=0)
logistic_model.fit(X_train_tfidf, y_train)

In [None]:
y_pred_train = logistic_model.predict(X_train_tfidf)
y_pred_test = logistic_model.predict(X_test_tfidf)

In [None]:
accuracy_train = accuracy_score(y_train,y_pred_train)
accuracy_test = accuracy_score(y_test, y_pred_test)
print(f"Train: {accuracy_train:.4f}")
print(f"Test: {accuracy_test:.4f}")

## 3.2 Hyperparameter tuning

In [None]:
import numpy as np
from sklearn.model_selection import GridSearchCV

In [None]:
param_grid = [
    {'penalty': ['l1', 'l2', 'elasticnet', 'none'],
     'C': np.logspace(-4, 4, 20),
     'solver': ['lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'],
     'max_iter' : [100, 1000, 2500, 5000]
}
]

In [None]:
clf = GridSearchCV(logistic_model, param_grid = param_grid, cv = 3,
verbose=True, n_jobs=-1)
clf

In [None]:
best_clf = clf.fit(X_train_tfidf, y_train)
best_clf.best_estimator_

Fitting 3 folds for each of 1600 candidates, totalling 4800 fits
