In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler


data = pd.read_csv('merge.csv')
X = data[['Value', 'Cumulative Value', 'Gradient', 'Max_Value']]
y = data['freezing']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


rf = RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_split=10, min_samples_leaf=5, max_features='sqrt', bootstrap=True, random_state=42)
rf.fit(X_train, y_train)


y_pred = rf.predict(X_test)


scores = cross_val_score(rf, X, y, cv=5)
print(f'Cross-validation scores: {scores}')
print(f'Mean score: {scores.mean()}')


cm = confusion_matrix(y_test, y_pred)
print(f'Confusion Matrix:\n{cm}')


y_prob = rf.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)
print(f'ROC AUC: {roc_auc}')


plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc="lower right")
plt.show()

importances = rf.feature_importances_
std = np.std([tree.feature_importances_ for tree in rf.estimators_], axis=0)
indices = np.argsort(importances)[::-1]

plt.figure()
plt.title("Feature importances")
plt.bar(range(X.shape[1]), importances[indices], color="r", yerr=std[indices], align="center")
plt.xticks(range(X.shape[1]), X.columns[indices], rotation=90)
plt.xlim([-1, X.shape[1]])
plt.show()


from sklearn.utils import shuffle


data = pd.read_csv('merge.csv')


X = data[['Value', 'Cumulative Value', 'Gradient', 'Max_Value']]
y = data['freezing']


scores_list = []
fpr_list = []
tpr_list = []
roc_auc_list = []


for i in range(100):
    
    y_shuffled = shuffle(y, random_state=i)

  
    X_train, X_test, y_train, y_test = train_test_split(X, y_shuffled, test_size=0.2, random_state=42)

 
    rf_shuffled = RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_split=10, min_samples_leaf=5, max_features='sqrt', bootstrap=True, random_state=42)
    rf_shuffled.fit(X_train, y_train)
    scores_shuffled = cross_val_score(rf_shuffled, X, y_shuffled, cv=5)
    scores_list.append(scores_shuffled)

   

mean_scores = np.mean(scores_list, axis=0)

print(f'Cross-validation scores (shuffled y): {mean_scores}')
print(f'Mean score (shuffled y): {mean_scores.mean()}')



import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from random import shuffle



data = pd.read_csv('merge.csv')


X = data[['Value', 'Cumulative Value', 'Gradient', 'Max_Value']]
y = data['freezing']

y_shuffled = y.copy().tolist()
shuffle(y_shuffled)
y_shuffled = pd.Series(y_shuffled, index=y.index)


X_train, X_test, y_train, y_test = train_test_split(X, y_shuffled, test_size=0.2, random_state=42)



rf_shuffled = RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_split=10, min_samples_leaf=5, max_features='sqrt', bootstrap=True, random_state=42)
rf_shuffled.fit(X_train, y_train)


y_pred_shuffled = rf_shuffled.predict(X_test)


accuracy_shuffled = accuracy_score(y_test, y_pred_shuffled)
cm_shuffled = confusion_matrix(y_test, y_pred_shuffled)
print(f'Shuffled Accuracy: {accuracy_shuffled}')
print(f'Shuffled Confusion Matrix:\n{cm_shuffled}')


y_prob_shuffled = rf_shuffled.predict_proba(X_test)[:, 1]
fpr_shuffled, tpr_shuffled, _ = roc_curve(y_test, y_prob_shuffled)
roc_auc_shuffled = auc(fpr_shuffled, tpr_shuffled)
print(f'Shuffled ROC AUC: {roc_auc_shuffled}')

plt.figure()
plt.plot(fpr, tpr, color='#E84E1B', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot(fpr_shuffled, tpr_shuffled, color='#0F99B2', lw=2, label=f'Shuffled ROC curve (area = {roc_auc_shuffled:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc="lower right")
plt.savefig('RF.pdf')  
plt.show()


