In [None]:
from sklearn.model_selection import KFold
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import RidgeClassifier
from sklearn.dummy import DummyClassifier
import numpy as np
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split , GridSearchCV
import pandas as pd

In [None]:
train = pd.read_csv("/content/C_TRAIN.csv")
test = pd.read_csv("/content/C_TEST.csv")

In [None]:
x = train['Content']
y = train['Label']
xx_test = test["Content"]

In [None]:
xx_test.isnull().sum()

0

In [None]:
xx_test.fillna('', inplace=True)

In [None]:
xx_test.isnull().sum()

0

In [None]:
tfidf_vectorizer = TfidfVectorizer()

In [None]:
x_tfidf = tfidf_vectorizer.fit_transform(x)
x_test_tfidf = tfidf_vectorizer.transform(xx_test)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_tfidf,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42 ,
                                                    shuffle = True)

In [None]:
# Define the models
models = {
    'Ridge Classifier': RidgeClassifier(),
}

In [None]:
# Define the hyperparameter tuning space for each model
param_grids = {
    'Ridge Classifier': {'alpha': [0.1, 1, 10]},
}

In [None]:
# Perform hyperparameter tuning and training for each model
for model_name, model in models.items():
    print(f"Training {model_name}...")
    grid_search = GridSearchCV(model, param_grids[model_name], cv=5, scoring='accuracy')
    grid_search.fit(x_train, y_train)
    print(f"Best Parameters: {grid_search.best_params_}")
    print(f"Best Score: {grid_search.best_score_:.4f}")
    print()

Training Ridge Classifier...
Best Parameters: {'alpha': 1}
Best Score: 0.6305



In [None]:
 #Create a voting classifier with the tuned models
 estimators = [(('rc', RidgeClassifier(alpha=1)))]

In [None]:
voting_model = VotingClassifier(estimators=estimators, voting='hard')

In [None]:
voting_model.fit(x_train, y_train)

In [None]:
y_pred = voting_model.predict(x_test)

In [None]:
# Evaluate the stacking classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of Voting classifier: {accuracy:.4f}")

Accuracy of Voting classifier: 0.6412


In [None]:
y_pred = voting_model.predict(x_test_tfidf)

In [None]:

# Create a submission file
num_predictions_needed = 21000
y_test_pred_padded = np.pad(y_pred, (0, num_predictions_needed - len(y_pred)), 'constant', constant_values='unknown')
submission = pd.DataFrame({"ID": range(num_predictions_needed), "Label": y_test_pred_padded})

In [None]:

#print(test.shape)


In [None]:
print(submission.shape)

(21000, 2)


In [None]:
submission.head()

Unnamed: 0,ID,Label
0,0,C
1,1,E
2,2,B
3,3,H
4,4,H


In [None]:
submission.to_csv("submission4.csv", index=False)