<a href="https://colab.research.google.com/github/Rivianee/data-science/blob/master/PYCAREST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install pycaret

In [7]:
import pandas as pd
from pycaret.classification import *

class CategoryClassifier:
    def __init__(self, file_path):
        self.data = pd.read_excel(file_path)
        self.data['Produto'] = self.data['Produto'].str.lower()
        self.setup_done = False
        self.trained_model = None

    def preprocess_data(self):
        self.data = self.data.dropna(subset=['Produto'])
        if self.data['Categoria'].isnull().any():
            self.data['Categoria'].fillna('Desconhecida', inplace=True)

    def setup_pycaret(self):
        self.exp_clf = setup(data=self.data, target='Categoria', text_features=['Produto'], session_id=123)
        self.setup_done = True

    def train_best_model(self):
        if not self.setup_done:
            print("You need to call setup_pycaret() before training.")
            return
        best_model = compare_models()
        self.trained_model = finalize_model(best_model)

    def classify_uncategorized(self, output_file='seu_arquivo_atualizado.xlsx'):
        if self.trained_model is None:
            print("You need to train a model before classifying.")
            return
        unclassified_data = self.data[self.data['Categoria'].isnull()]
        if len(unclassified_data) > 0:
            predictions = predict_model(self.trained_model, data=unclassified_data)
            self.data.loc[predictions.index, 'Categoria'] = predictions['Label']
            self.data.to_excel(output_file, index=False)
        else:
            print("No items without category to classify.")

# Criar uma instância da classe
classifier = CategoryClassifier('/content/PRODUTO E CATEGORIA PARA TESTE.xlsx')

# Pré-processar os dados
classifier.preprocess_data()

# Configurar o ambiente do PyCaret
classifier.setup_pycaret()

# Comparar modelos e treinar o melhor
classifier.train_best_model()

# Classificar itens sem categoria
classifier.classify_uncategorized()

Unnamed: 0,Description,Value
0,Session id,123
1,Target,Categoria
2,Target type,Multiclass
3,Target mapping,"Açai : 0, Bebida: 1, Carnes: 2, Comida Brasileira: 3, Comida Italiana: 4, Comida Japonesa: 5, Comida Saudável: 6, Comida de outros países: 7, Desconhecida: 8, Lanches: 9, Pizzas: 10, Salgados: 11, Sobremesas e Doces: 12, Árabe: 13"
4,Original data shape,"(4638, 2)"
5,Transformed data shape,"(4638, 2063)"
6,Transformed train set shape,"(3246, 2063)"
7,Transformed test set shape,"(1392, 2063)"
8,Text features,1
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.679,0.154,0.679,0.6039,0.5865,0.1496,0.2262,1.102
ridge,Ridge Classifier,0.675,0.0,0.675,0.6257,0.6258,0.2622,0.2877,0.347
knn,K Neighbors Classifier,0.6651,0.1403,0.6651,0.595,0.5988,0.1935,0.2271,0.75
rf,Random Forest Classifier,0.6565,0.148,0.6565,0.5941,0.6047,0.2201,0.2421,0.781
ada,Ada Boost Classifier,0.6559,0.1008,0.6559,0.4624,0.5368,0.0425,0.0717,0.43
svm,SVM - Linear Kernel,0.6522,0.0,0.6522,0.6167,0.6237,0.2827,0.2909,0.231
et,Extra Trees Classifier,0.6485,0.1482,0.6485,0.6019,0.6136,0.2565,0.2675,0.778
gbc,Gradient Boosting Classifier,0.6463,0.1484,0.6463,0.6005,0.6036,0.2228,0.2379,2.275
dt,Decision Tree Classifier,0.6063,0.1213,0.6063,0.5844,0.591,0.2397,0.2418,0.372
lda,Linear Discriminant Analysis,0.581,0.1402,0.581,0.5956,0.5861,0.2661,0.2667,0.319


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.679,0.154,0.679,0.6039,0.5865,0.1496,0.2262,1.102
ridge,Ridge Classifier,0.675,0.0,0.675,0.6257,0.6258,0.2622,0.2877,0.347
knn,K Neighbors Classifier,0.6651,0.1403,0.6651,0.595,0.5988,0.1935,0.2271,0.75
dummy,Dummy Classifier,0.6583,0.1,0.6583,0.4334,0.5227,0.0,0.0,0.315
xgboost,Extreme Gradient Boosting,0.658,0.1458,0.658,0.5952,0.6089,0.2368,0.2569,172.62
rf,Random Forest Classifier,0.6565,0.148,0.6565,0.5941,0.6047,0.2201,0.2421,0.781
ada,Ada Boost Classifier,0.6559,0.1008,0.6559,0.4624,0.5368,0.0425,0.0717,0.43
svm,SVM - Linear Kernel,0.6522,0.0,0.6522,0.6167,0.6237,0.2827,0.2909,0.231
et,Extra Trees Classifier,0.6485,0.1482,0.6485,0.6019,0.6136,0.2565,0.2675,0.778
gbc,Gradient Boosting Classifier,0.6463,0.1484,0.6463,0.6005,0.6036,0.2228,0.2379,2.275


No items without category to classify.
