#Random Forest

### 🌲 Initialize and Train Random Forest Classifier  
Create a `RandomForestClassifier` instance and train it on the resampled dataset.


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load your dataset
df = pd.read_csv("PATH")  # Update with your actual dataset path

# Check the dataset
print(df.head())


   ID  phase  durata  DistTOT     HSR  ACC  DEC   RPE  sRPE
0   1      1     328  18709.1  6862.3  284  287  13.0  1433
1   1      2     305  17323.5  5029.9  244  240  11.0  1137
2   1      3     332  19047.8  6248.8  265  225  10.0  1099
3   1      4     308  20898.0  7203.2  285  199  10.0  1040
4   2      1     305  17639.6  5223.5  245  261  13.0  1323


In [None]:
# Define features (X) and target (y)
X = df.drop(columns=['phase','RPE','DEC'])  # All columns except target
y = df['phase']                 # Target column

# Split data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Initialize Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=1, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)


### 💾 Save Trained Model  
Use `joblib` to save the trained model for future use without retraining.

In [None]:
import joblib
import numpy as np
from sklearn.ensemble import RandomForestClassifier

# Save the model
joblib.dump(rf_model, 'random_forest_model.pkl')
print("Model saved successfully.")

# Load the model
rf_loaded = joblib.load('random_forest_model.pkl')
print("Model loaded successfully.")

# Define a custom input for prediction
# Ensure the input matches the number of features used in training (X_train.shape[1])
#custom_input = np.array([[328, 18709.1, 6862.3, 284, 287, 13.0, 1433]])  # Replace with real values
c1 = np.array([[285,18669.7,4937.8,286,291,1360]])
c2 = np.array([[290,16572.9,5255.2,255,250,958]])
c3 = np.array([[301,18065.3,4464.0,235,271,1367]])
c4 = np.array([[300,17966.7,4363.5,237,254,1463]])
c5 = np.array([[304,17357.0,4363.7,231,191,1713]])
c6 = np.array([[328,19067.5,5086.5,301,236,1628]])
c7 = np.array([[284,16749.2,5702.0,262,250,1029]])
c8 = np.array([[281,17379.0,5425.2,268,276,915]])
c9 = np.array([[306,22181.9,7259.2,337,334,1558]])
c10 = np.array([[313,21033.3,6991.7,292,217,1168]])
c11 = np.array([[305,22116.0,7320.0,333,326,1510]])
c12 = np.array([[285,18669.7,4937.8,286,291,1360]])
c13 = np.array([[294,18322.7,4905.6,251,180,1700]])


# Make a prediction
p1 = rf_loaded.predict(c1)
p2 = rf_loaded.predict(c2)
p3 = rf_loaded.predict(c3)
p4 = rf_loaded.predict(c4)
p5 = rf_loaded.predict(c5)
p6 = rf_loaded.predict(c6)
p7 = rf_loaded.predict(c7)
p8 = rf_loaded.predict(c8)
p9 = rf_loaded.predict(c9)
p10 = rf_loaded.predict(c10)
p11 = rf_loaded.predict(c11)
p12 = rf_loaded.predict(c12)
p13 = rf_loaded.predict(c13)

print("Predicted class:", p1)
print("Predicted class:", p2)
print("Predicted class:", p3)
print("Predicted class:", p4)
print("Predicted class:", p5)
print("Predicted class:", p6)
print("Predicted class:", p7)
print("Predicted class:", p8)
print("Predicted class:", p9)
print("Predicted class:", p10)
print("Predicted class:", p11)
print("Predicted class:", p12)
print("Predicted class:", p13)


Model saved successfully.
Model loaded successfully.
Predicted class: [1]
Predicted class: [1]
Predicted class: [1]
Predicted class: [1]
Predicted class: [1]
Predicted class: [1]
Predicted class: [1]
Predicted class: [1]
Predicted class: [1]
Predicted class: [1]
Predicted class: [1]
Predicted class: [1]
Predicted class: [1]




In [None]:
# Print feature importance (if feature names are available)
if hasattr(X_train, 'columns'):
    feature_names = X_train.columns
else:
    feature_names = [f"Feature {i}" for i in range(X_train.shape[1])]

# Print the features used
print("Features used in the model:")
for name in feature_names:
    print("-", name)

# Print feature importance scores
importances = rf_model.feature_importances_
for name, importance in zip(feature_names, importances):
    print(f"{name}: {importance:.4f}")


Features used in the model:
- ID
- durata
- DistTOT
- HSR
- ACC
- sRPE
ID: 0.8120
durata: 0.0353
DistTOT: 0.0444
HSR: 0.0454
ACC: 0.0121
sRPE: 0.0508


### 🧪 Evaluate Model - Accuracy Score  
Use accuracy score to evaluate how well your model is performing on unseen (test) data.


In [None]:
from sklearn.metrics import accuracy_score

# Make predictions on the training set
y_train_pred = rf_model.predict(X_train)

# Make predictions on the test set
y_test_pred = rf_model.predict(X_test)

# Calculate training accuracy
train_accuracy = accuracy_score(y_train, y_train_pred)

# Calculate test accuracy
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Training Accuracy: 98.61%
Test Accuracy: 95.40%


### 📈 Make Predictions  
Generate predictions using the trained model on the test dataset.


In [None]:
# Predict on test set
y_pred = rf_model.predict(X_test)


### ✅ Print Model Accuracy  
Finally, print the accuracy of your model to get a quick overview of its performance.

In [None]:
# Print Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print Classification Report
print(classification_report(y_test, y_pred))


Accuracy: 0.93
              precision    recall  f1-score   support

           1       0.96      0.94      0.95       171
           2       0.87      0.93      0.90       138
           3       0.90      0.94      0.92       148
           4       0.97      0.90      0.93       163

    accuracy                           0.93       620
   macro avg       0.92      0.93      0.92       620
weighted avg       0.93      0.93      0.93       620

