In [1]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd

# Load the data
datos = pd.read_csv('Outputfile_RV2.csv')
df = datos

# Separate features and labels
X = df.drop('Output', axis=1)
y = df['Output'].astype(float)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create the AdaBoost classifier
estimator = AdaBoostClassifier()

# Create the RFE selector
selector = RFE(estimator, n_features_to_select=9, step=1)

# Fit the RFE selector to the training data
selector.fit(X_train, y_train)

# Get the selected features
selected_features = X.columns[selector.support_]

# Train the model with the selected features
estimator.fit(X_train[selected_features], y_train)

# Predict on the test set
y_pred = estimator.predict(X_test[selected_features])

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Print evaluation metrics
print("Validation Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

# Create a results table
results = pd.DataFrame({'Metric': ['Accuracy', 'Precision', 'Recall', 'F1-score'],
                        'Score': [accuracy, precision, recall, f1]})
print("\nResults Table:")
print(results)

Validation Metrics:
Accuracy: 0.45522141440846003
Precision: 0.4594864765978388
Recall: 0.45522141440846003
F1-score: 0.4550923004002625

Results Table:
      Metric     Score
0   Accuracy  0.455221
1  Precision  0.459486
2     Recall  0.455221
3   F1-score  0.455092


In [2]:
print("\nCaracterísticas seleccionadas:")
print(selected_features)


Características seleccionadas:
Index(['0', '1', '2', '19', '110', '144', '147', '148', '149'], dtype='object')


In [3]:
from sklearn.metrics import classification_report

# Calcular métricas de validación para cada clase
classification_rep = classification_report(y_test, y_pred, target_names=['Nuetral', 'Happy', 'Angry', 'Sad'])

# Imprimir métricas por clase
print("Métricas de validación por clase:")
print(classification_rep)

Métricas de validación por clase:
              precision    recall  f1-score   support

     Nuetral       0.42      0.37      0.40      2714
       Happy       0.42      0.50      0.46      3952
       Angry       0.57      0.52      0.54      2794
         Sad       0.44      0.40      0.42      2644

    accuracy                           0.46     12104
   macro avg       0.46      0.45      0.45     12104
weighted avg       0.46      0.46      0.46     12104

