In [None]:
import pandas as pd
from abc import ABC, abstractmethod
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier as SklearnRandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer


In [None]:
class Classifier:
  def fit(self,X_train,y_train):
    raise NotImplementedError()

  def predict(self,X_test):
    raise NotImplementedError()

#Classe para a implementação
class Evaluator:
  def evaluate(self,y_test,y_pred):
    raise NotImplementedError()

#Implementações
class RandomForestModel(Classifier):
  def __init__(self):
    self.model = SklearnRandomForestClassifier()

  def fit(self,X_train,y_train):
    print("X_train shape:", X_train.shape)
    print("y_train shape:", y_train.shape)
    self.model.fit(X_train, y_train)

  def predict(self,X_test):
    return self.model.predict(X_test)

class AccuracyEvaluator(Evaluator):
  def evaluate(self,y_test,y_pred):
    return accuracy_score(y_test,y_pred) # A função accuracy_score vem do sklearn.metrics

#Classes de responsabilidade única
class DataLoader:
  def load_data(self,file_path):
    dataframe =  pd.read_csv(file_path,sep=';')
    print("Cabeçalho:")
    print(dataframe.head())
    return dataframe

class DataDescriber:
  def describe_data(self,dataframe):
    print("Descrição dos dados:")
    print(dataframe.describe())
    print("Dados faltantes:")
    print(dataframe.isnull().sum())
    print()

class DataPreprocessor:
  def preprocess_data(self,dataframe):
    if dataframe.empty:
      raise ValueError("O DataFrame está vazio.")

    if dataframe.isnull().any().any():
      print("Valores nulos encontrados")
      dataframe = dataframe.dropna()
      print("Valores nulos removidos")

    #Padronização
#    def padronizar_colunas(self,dataframe):
#      categorical_cols = dataframe.select_dtypes(include=['object', 'category']).columns.tolist()
#      numerical_cols = dataframe.select_dtypes(include=['number']).columns.tolist()

      # Criar transformadores
#      numerical_transformer = StandardScaler()
#      categorical_transformer = OneHotEncoder(handle_unknown='ignore')

      # Criar ColumnTransformer
 #     preprocessor = ColumnTransformer(
#        transformers=[
#            ('num', numerical_transformer, numerical_cols),
 #           ('cat', categorical_transformer, categorical_cols)
#              ])

      # Aplicar transformações
#      dataframe = preprocessor.fit_transform(dataframe)

      # Reconstruir DataFrame com nomes de colunas corretos
 #     feature_names = numerical_cols + list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_cols))
 #     dataframe = pd.DataFrame(dataframe, columns=feature_names)

    return dataframe

class DataSplitter:
  def split_data(self, dataframe):
    X = dataframe.drop('quality', axis=1)
    y = dataframe['quality']
    return train_test_split(X,y,test_size=0.2,random_state=42)

class ModelTrainer:
  def __init__(self,classifier:Classifier):
    self.classifier = classifier

  def train_model(self,X_train, y_train):
    self.classifier.fit(X_train,y_train)

class ModelEvaluator:
  def __init__(self,evaluator:Evaluator):
    self.evaluator = evaluator

  def evaluate_model(self,y_test,y_pred):
    return self.evaluator.evaluate(y_test,y_pred)


#Classe principal
class WineClassifier:
  def __init__(self,file_path, classifier:Classifier,evaluator:Evaluator):
    self.file_path = file_path
    self.classifier = classifier
    self.evaluator = evaluator
    self.data_loader = DataLoader()
    self.data_preprocessor = DataPreprocessor()
    self.data_splitter = DataSplitter()
    self.model_trainer = ModelTrainer(classifier)
    self.model_evaluator = ModelEvaluator(evaluator)

  def run(self):
    dataframe = self.data_loader.load_data(self.file_path)
    dataframe = self.data_preprocessor.preprocess_data(dataframe)
    X_train, X_test, y_train, y_test = self.data_splitter.split_data(dataframe)
    self.model_trainer.train_model(X_train, y_train)
    y_pred = self.classifier.predict(X_test)
    accuracy = self.model_evaluator.evaluate_model(y_test, y_pred)
    print(f'Acurácia do modelo: {accuracy}')




In [None]:
classifier = RandomForestModel()
evaluator = AccuracyEvaluator()
wine_classifier = WineClassifier('winequality-red.csv', classifier, evaluator)
wine_classifier.run()

Cabeçalho:
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   alcohol  quality  
0      9.4        5  
1      9.8        5  
2      9.

In [None]:
df = pd.read_csv('winequality-red.csv',sep=';')
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
