# Notebook Atualizado

Este notebook realiza processamento, treinamento e avaliação de modelos usando os arquivos `classic4.csv` e `Industry Sector.csv`. Os resultados são comparados para os dois conjuntos de dados.

In [None]:

import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier


In [None]:

def process_and_train(dataset_path, model, model_name):
    print(f"Processando o arquivo: {dataset_path}")
    df = pd.read_csv(dataset_path)
    
    # Dividir os dados em treino e teste
    x_train, x_test, y_train, y_test = train_test_split(
        df['text'], df['class'], test_size=0.2, stratify=df['class'], random_state=42
    )
    
    # Vetorização e TF-IDF
    vectorizer = CountVectorizer()
    x_train_vec = vectorizer.fit_transform(x_train)
    x_test_vec = vectorizer.transform(x_test)

    tfidf = TfidfTransformer()
    x_train_tfidf = tfidf.fit_transform(x_train_vec)
    x_test_tfidf = tfidf.transform(x_test_vec)
    
    # Treinar o modelo
    model.fit(x_train_tfidf, y_train)
    
    # Fazer previsões e avaliar
    y_pred = model.predict(x_test_tfidf)
    print(f"Resultados para o modelo {model_name} e arquivo {dataset_path}:")
    print(classification_report(y_test, y_pred))


In [None]:

# Executar para Naive Bayes
nb_model = MultinomialNB()
process_and_train("C:/Users/tagsa/Downloads/classic4.csv", nb_model, "Naive Bayes")
process_and_train("C:/Users/tagsa/Downloads/Industry Sector.csv", nb_model, "Naive Bayes")


In [None]:

# Executar para Logistic Regression
lr_model = LogisticRegression(max_iter=200, solver='liblinear')
process_and_train("C:/Users/tagsa/Downloads/classic4.csv", lr_model, "Logistic Regression")
process_and_train("C:/Users/tagsa/Downloads/Industry Sector.csv", lr_model, "Logistic Regression")


In [None]:

# Executar para Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
process_and_train("C:/Users/tagsa/Downloads/classic4.csv", rf_model, "Random Forest")
process_and_train("C:/Users/tagsa/Downloads/Industry Sector.csv", rf_model, "Random Forest")
