# Libs

In [55]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import pickle
import os
import sys

# Dataframe

In [33]:
df = pd.read_csv(r'C:\Users\feh_s\Desktop\Faturas\2025faturas.csv', sep=',', decimal='.', encoding='latin1')
df.head()

Unnamed: 0,Data,NmGasto,Valor,DataDaFatura,Tipo,Dono
0,10/02/2025,Dm*udemy,24.9,2025-02,estudo,Felipe
1,01/02/2025,Google One,79.9,2025-02,aplicativo,Felipe
2,02/02/2025,Google Myworkoutplan,16.99,2025-02,saude,Felipe
3,16/02/2025,Google 365scores Live,7.97,2025-02,aplicativo,Felipe
4,13/01/2025,Amazon Marketplace,86.98,2025-02,casa,Divide


In [34]:
# 🔁 Copia o DataFrame para codificação sem modificar o original
df_modelo = df.copy()

# Random Forest

In [35]:
# 🔁 Codifica variáveis nominais (string → número)
label_encoder = LabelEncoder()
df['Nm_Encoded'] = label_encoder.fit_transform(df['NmGasto'])
df['Tipo_Encoded'] = label_encoder.fit_transform(df['Tipo'])

# 🔎 Separando variáveis preditoras e alvo
X = df[['Valor', 'Nm_Encoded']]
y = df['Tipo_Encoded']

# 🔀 Divisão em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [42]:
# 🌳 Modelo Random Forest
modelo = RandomForestClassifier(n_estimators=1000, random_state=42)
modelo.fit(X_train, y_train)

0,1,2
,n_estimators,1000
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [44]:
from sklearn.model_selection import GridSearchCV

rf_grid = RandomForestClassifier()
gr_space = {
    'max_depth': [3,5,7,10],
    'n_estimators': [1000, 2000, 3000, 4000, 5000],
    'max_features': [10, 20, 30 , 40],
    'min_samples_leaf': [1, 2, 4]
}

grid = GridSearchCV(rf_grid, gr_space, cv = 3, scoring='accuracy', verbose = 3)
model_grid = grid.fit(X_train, y_train)

print('Best hyperparameters are '+str(model_grid.best_params_))
print('Best score is: ' + str(model_grid.best_score_))

Fitting 3 folds for each of 240 candidates, totalling 720 fits




[CV 1/3] END max_depth=3, max_features=10, min_samples_leaf=1, n_estimators=1000;, score=0.486 total time=   0.6s
[CV 2/3] END max_depth=3, max_features=10, min_samples_leaf=1, n_estimators=1000;, score=0.500 total time=   0.7s
[CV 3/3] END max_depth=3, max_features=10, min_samples_leaf=1, n_estimators=1000;, score=0.444 total time=   0.6s
[CV 1/3] END max_depth=3, max_features=10, min_samples_leaf=1, n_estimators=2000;, score=0.486 total time=   1.4s
[CV 2/3] END max_depth=3, max_features=10, min_samples_leaf=1, n_estimators=2000;, score=0.528 total time=   1.4s
[CV 3/3] END max_depth=3, max_features=10, min_samples_leaf=1, n_estimators=2000;, score=0.444 total time=   1.4s
[CV 1/3] END max_depth=3, max_features=10, min_samples_leaf=1, n_estimators=3000;, score=0.486 total time=   2.2s
[CV 2/3] END max_depth=3, max_features=10, min_samples_leaf=1, n_estimators=3000;, score=0.500 total time=   3.0s
[CV 3/3] END max_depth=3, max_features=10, min_samples_leaf=1, n_estimators=3000;, score

In [45]:
# 🌳 Modelo Random Forest
modelo = RandomForestClassifier(
    max_depth=7, max_features=10, min_samples_leaf=1, n_estimators=1000
    )
modelo.fit(X_train, y_train)

0,1,2
,n_estimators,1000
,criterion,'gini'
,max_depth,7
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,10
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [46]:
# 🔍 Previsões e avaliação
# 🔮 Faz previsões no conjunto original (ou em outro)
df_modelo['Previsao'] = modelo.predict(X)
df['Previsao'] = label_encoder.inverse_transform(df_modelo['Previsao'])


y_pred = modelo.predict(X_test)
print("🎯 Acurácia:", accuracy_score(y_test, y_pred))
print("📊 Relatório de classificação:")
print(classification_report(y_test, y_pred))

🎯 Acurácia: 0.6808510638297872
📊 Relatório de classificação:
              precision    recall  f1-score   support

           0       0.67      1.00      0.80         4
           1       0.00      0.00      0.00         2
           2       0.00      0.00      0.00         1
           3       0.75      0.75      0.75         4
           4       0.75      0.43      0.55         7
           6       0.00      0.00      0.00         0
           8       1.00      1.00      1.00         3
           9       0.00      0.00      0.00         3
          10       0.50      0.50      0.50         4
          11       1.00      0.67      0.80         3
          12       0.75      1.00      0.86         3
          14       0.50      0.50      0.50         2
          16       0.83      1.00      0.91         5
          17       1.00      1.00      1.00         1
          18       1.00      1.00      1.00         5
          19       0.00      0.00      0.00         0

    accuracy       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [47]:
df

Unnamed: 0,Data,NmGasto,Valor,DataDaFatura,Tipo,Dono,Nm_Encoded,Tipo_Encoded,Previsao
0,10/02/2025,Dm*udemy,24.90,2025-02,estudo,Felipe,17,7,estudo
1,01/02/2025,Google One,79.90,2025-02,aplicativo,Felipe,28,1,aplicativo
2,02/02/2025,Google Myworkoutplan,16.99,2025-02,saude,Felipe,27,14,academia
3,16/02/2025,Google 365scores Live,7.97,2025-02,aplicativo,Felipe,23,1,aplicativo
4,13/01/2025,Amazon Marketplace,86.98,2025-02,casa,Divide,1,4,casa
...,...,...,...,...,...,...,...,...,...
151,21/06/2025,Editora Wmf Martin,63.92,2025-06,livros,Lina,20,12,livros
152,23/06/2025,Google Scribd Inc,27.99,2025-06,livros,Lina,31,12,livros
153,24/06/2025,Magnifico Carnes,409.53,2025-06,comida,Divide,52,5,comida
154,09/06/2025,dÃ³lar de conversÃ£o,5.90,2025-06,tarifa,Divide,82,17,tarifa


# Salvar modelo como pickle

In [63]:
caminho_atual = os.getcwd()

caminho_modelo = os.path.abspath(os.path.join(caminho_atual, 'modelos'))



C:\Users\feh_s\Streamlit-for-Data-Science\ControleFinanceiro\CartaoDeCredito
C:\Users\feh_s\Streamlit-for-Data-Science\ControleFinanceiro\CartaoDeCredito\modelos


In [72]:
file_path = caminho_modelo + "\RandomForest.pkl" # Replace with a valid path

try:
    with open(file_path, 'wb') as f:
        pickle.dump(modelo, f)
    print(f"Data successfully pickled to {file_path}")
except PermissionError:
    print(f"Permission denied: Cannot write to {file_path}. Check file/directory permissions.")
except Exception as e:
    print(f"An error occurred: {e}")

Data successfully pickled to C:\Users\feh_s\Streamlit-for-Data-Science\ControleFinanceiro\CartaoDeCredito\modelos\RandomForest.pkl


In [None]:
# rf_pickle = open('random_forest_penguin.pickle', 'wb')
# pickle.dump(rfc, rf_pickle)
# rf_pickle.close()
# output_pickle = open('output_penguin.pickle', 'wb')
# pickle.dump(uniques, output_pickle)
# output_pickle.close()