In [None]:
#import required libraries

import pandas as pd
import pickle
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

# Load Data
data = pd.read_csv('../Data/preprocessed_data.csv')
data.dropna(subset=['tweet_text', 'cyberbullying_type'], inplace=True)

X = data['tweet_text']
y = data['cyberbullying_type']  # use the correct label column

# Load the original vectorizer from saved model.pkl
with open('../App/model/model.pkl', 'rb') as f:
    tfidf, _ = pickle.load(f)

# Transform the text data
X_tfidf = tfidf.transform(X)

# Define the SVM model for tuning
svc = SVC(probability=True)

# Define hyperparameters to tune
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

# Setup GridSearchCV
grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=3, scoring='accuracy', verbose=2, n_jobs=-1)

# Fit the model
grid_search.fit(X_tfidf, y)

print("✅ Best Parameters:", grid_search.best_params_)
print("✅ Best Cross-Validation Accuracy:", grid_search.best_score_)

# Save Fine-Tuned Model
with open('../App/model/fine_tune.pkl', 'wb') as f:
    pickle.dump((tfidf, grid_search.best_estimator_), f)

print("✅ Fine-tuned model saved as fine_tune.pkl")


Fitting 3 folds for each of 12 candidates, totalling 36 fits
✅ Best Parameters: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}
✅ Best Cross-Validation Accuracy: 0.7577194083044668
✅ Fine-tuned model saved as fine_tune.pkl
