In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Leer los datos
datos = pd.read_csv('Outputfile_RV2.csv')
df = datos

# Separar características y etiquetas
X = df.drop('Output', axis=1)
y = df['Output'].astype(float)

In [6]:
X.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,140,141,142,143,144,145,146,147,148,149
0,-0.412267,0.024408,0.0,-0.412267,0.024408,0.0,-0.412267,0.024408,0.0,-0.412267,...,0.0,-0.049226,0.199663,0.0,-0.049226,0.157173,0.0,-0.049226,0.157173,0.0
1,-0.378423,0.138386,0.0,-0.378423,0.138386,0.0,-0.378423,0.138386,0.0,-0.230745,...,0.0,-0.049226,0.052479,0.0,-0.049226,0.052479,0.0,-0.049226,0.052479,0.0
2,-0.230745,0.251659,0.0,-0.230745,0.251659,0.0,-0.086145,0.273901,0.0,-0.086145,...,0.0,0.00923,0.106241,0.0,0.00923,0.106241,0.0,0.00923,0.106241,0.0
3,-0.086145,0.273901,0.0,-0.058455,0.240343,0.0,-0.058455,0.240343,0.0,-0.058455,...,0.0,0.006153,0.387875,0.0,0.006153,0.387875,0.0,0.006153,0.412022,0.0
4,-0.049226,0.234635,0.0,-0.049226,0.234635,0.0,-0.049226,0.234635,0.0,-0.049226,...,0.0,0.006153,0.412022,0.0,-0.006153,0.449417,0.0,-0.006153,0.449417,0.0
5,-0.049226,0.223118,0.0,-0.049226,0.223118,0.0,-0.049226,0.223118,0.0,-0.049226,...,0.0,-0.006153,0.449417,0.0,-0.030766,0.609662,0.0,-0.030766,0.609662,0.0
6,-0.049226,0.223118,0.0,-0.049226,0.223118,0.0,-0.049226,0.223118,0.0,-0.049226,...,0.0,-0.064609,0.768276,0.0,-0.064609,0.768276,0.0,-0.064609,0.768276,0.0
7,-0.049226,0.223118,0.0,-0.049226,0.223118,0.0,-0.049226,0.223118,0.0,-0.049226,...,0.0,-0.113834,0.831137,0.0,-0.113834,0.831137,0.0,-0.147677,0.837204,0.0
8,-0.049226,0.223118,0.0,-0.049226,0.223118,0.0,-0.049226,0.223118,0.0,-0.049226,...,0.0,-0.147677,0.837204,0.0,-0.16306,0.846234,0.0,-0.16306,0.846234,0.0
9,-0.049226,0.199663,0.0,-0.049226,0.199663,0.0,-0.049226,0.199663,0.0,-0.049226,...,0.0,-0.16306,0.849226,0.0,-0.16306,0.849226,0.0,-0.16306,0.849226,0.0


In [8]:
# Dividir datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Crear el estimador
estimator = RandomForestClassifier()

# Crear el selector RFE
selector = RFE(estimator, n_features_to_select=9, step=1)

# Ajustar el selector a los datos de entrenamiento
selector.fit(X_train, y_train)

# Obtener las características seleccionadas
selected_features = X.columns[selector.support_]

# Entrenar el modelo con las características seleccionadas
estimator.fit(X_train[selected_features], y_train)

# Predecir en el conjunto de prueba
y_pred = estimator.predict(X_test[selected_features])

# Calcular métricas de validación
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Imprimir métricas
print("Métricas de validación:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

# Crear una tabla de resultados
results = pd.DataFrame({'Metric': ['Accuracy', 'Precision', 'Recall', 'F1-score'],
                        'Score': [accuracy, precision, recall, f1]})
print("\nTabla de resultados:")
print(results)

Métricas de validación:
Accuracy: 0.8897884996695308
Precision: 0.8899107556905445
Recall: 0.8897884996695308
F1-score: 0.889781840884192

Tabla de resultados:
      Metric     Score
0   Accuracy  0.889788
1  Precision  0.889911
2     Recall  0.889788
3   F1-score  0.889782


In [9]:
print("\nCaracterísticas seleccionadas:")
print(selected_features)


Características seleccionadas:
Index(['0', '1', '3', '4', '6', '105', '144', '147', '148'], dtype='object')


In [10]:
from sklearn.metrics import classification_report

# Calcular métricas de validación para cada clase
classification_rep = classification_report(y_test, y_pred, target_names=['Nuetral', 'Happy', 'Angry', 'Sad'])

# Imprimir métricas por clase
print("Métricas de validación por clase:")
print(classification_rep)

Métricas de validación por clase:
              precision    recall  f1-score   support

     Nuetral       0.91      0.90      0.90      2714
       Happy       0.88      0.90      0.89      3952
       Angry       0.89      0.88      0.88      2794
         Sad       0.89      0.88      0.88      2644

    accuracy                           0.89     12104
   macro avg       0.89      0.89      0.89     12104
weighted avg       0.89      0.89      0.89     12104

