In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

# Carregar os dados
filepath = "C:/Users/tagsa/Downloads/classic4.csv"
df = pd.read_csv(filepath)

# Dividir os dados em treino e teste
x_train, x_test, y_train, y_test = train_test_split(
    df['text'], df['class'], test_size=0.2, stratify=df['class'], random_state=42
)

# Vetorizar os textos
vectorizer = CountVectorizer()
x_train_vec = vectorizer.fit_transform(x_train)
x_test_vec = vectorizer.transform(x_test)

# Transformar os vetores em representações TF-IDF
tfidf = TfidfTransformer()
x_train_tfidf = tfidf.fit_transform(x_train_vec)
x_test_tfidf = tfidf.transform(x_test_vec)

# Definir os modelos e seus grids de hiperparâmetros
models = [
    ("Naive Bayes", MultinomialNB(), {'alpha': [0.1, 0.5, 1.0]}),
    ("Logistic Regression", LogisticRegression(max_iter=200), {'C': [0.1, 1.0, 10], 'solver': ['liblinear', 'lbfgs']}),
    ("Random Forest", RandomForestClassifier(), {'n_estimators': [50, 100], 'max_depth': [None, 10]})
]

# Armazenar os resultados
results = []

# Realizar GridSearchCV para cada modelo
for model_name, model, param_grid in models:
    print(f"Executando GridSearchCV para {model_name}...")
    grid_search = GridSearchCV(
        estimator=model,
        param_grid=param_grid,
        cv=5,
        scoring='f1_macro',
        verbose=1,
        n_jobs=-1
    )
    grid_search.fit(x_train_tfidf, y_train)
    
    # Salvar os melhores resultados
    best_score = grid_search.best_score_
    best_params = grid_search.best_params_
    test_score = grid_search.best_estimator_.score(x_test_tfidf, y_test)
    
    results.append({
        "Model": model_name,
        "Best Score (CV)": best_score,
        "Best Parameters": best_params,
        "Test Score": test_score
    })

# Salvar os resultados em um arquivo CSV
results_df = pd.DataFrame(results)
output_path = "C:/Users/tagsa/Downloads/Grid_Search_Results.csv"
results_df.to_csv(output_path, index=False)
print(f"Resultados salvos em {output_path}")


FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/tagsa/Downloads/classic4.csv'