In [275]:
import pandas as pd
import seaborn as sns 
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
import numpy as np

In [276]:
df = pd.read_csv('winequality-red.csv',delimiter=';')

In [277]:
x= df.iloc[:, :-1].values  
y= df.iloc[:, 11].values  

In [278]:
from sklearn.model_selection import train_test_split  
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.2, random_state=0) 

In [305]:
from imblearn.over_sampling import RandomOverSampler,SMOTE
oversample = RandomOverSampler(sampling_strategy='minority')
sm = SMOTE(random_state=0)
x_sm , y_sm = sm.fit_resample(x_train,y_train)

In [306]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE

clf = RandomForestClassifier(n_estimators=100,random_state=0)
rfe = RFE(clf,n_features_to_select=9)
x_over, y_over = oversample.fit_resample(x,y)
x_sm , y_sm = sm.fit_resample(x_train,y_train)
rfe.fit(x_sm,y_sm)

In [307]:
y_pred_test= rfe.predict(x_test)  
y_pred_train = rfe.predict(x_train)


In [308]:
print(classification_report(y_test,y_pred_test))

              precision    recall  f1-score   support

           3       0.00      0.00      0.00         2
           4       0.19      0.27      0.22        11
           5       0.79      0.77      0.78       135
           6       0.74      0.60      0.66       142
           7       0.35      0.67      0.46        27
           8       0.00      0.00      0.00         3

    accuracy                           0.66       320
   macro avg       0.34      0.38      0.35       320
weighted avg       0.70      0.66      0.67       320



In [289]:
print(classification_report(y_train,y_pred_train))

              precision    recall  f1-score   support

           3       1.00      1.00      1.00         8
           4       1.00      1.00      1.00        42
           5       1.00      1.00      1.00       546
           6       1.00      1.00      1.00       496
           7       1.00      1.00      1.00       172
           8       1.00      1.00      1.00        15

    accuracy                           1.00      1279
   macro avg       1.00      1.00      1.00      1279
weighted avg       1.00      1.00      1.00      1279



In [319]:
#loading the libraries
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.preprocessing import RobustScaler
from sklearn.feature_selection import RFE



#Instantiate pipeline components
scaler = RobustScaler() #numerical scaler
undersampler = RandomUnderSampler(random_state=0) #undersampling technique
oversampler = SMOTE(random_state=0) #oversampling technique
model = RandomForestClassifier()
rfe = RFE(clf,n_features_to_select=9)

#Instantiate the pipeline
pipeline = Pipeline([
    ('scaler', scaler),
    ('undersampler', undersampler),
    ('oversampler', oversampler),
    ('rfe',rfe),
    ('model', model)
    
])


#loading the library
from sklearn.metrics import classification_report

#fitting the pipeline to the train data
pipeline.fit(x_train, y_train)

#Obtaining the test results
predictions = pipeline.predict(x_test)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           3       0.00      0.00      0.00         2
           4       0.00      0.00      0.00        11
           5       0.70      0.60      0.65       135
           6       0.50      0.11      0.18       142
           7       0.15      0.22      0.18        27
           8       0.01      0.33      0.03         3

    accuracy                           0.33       320
   macro avg       0.23      0.21      0.17       320
weighted avg       0.53      0.33      0.37       320

