In [3]:
import os
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import pandas as pd

# ---- CONFIG ----
IMAGE_SIZE = (64, 64)
SEQUENCE_LENGTH = 40
IMAGE_FOLDER = '/Users/shyampremi/Desktop/semantic_data/file_folder'  # <-- put your actual folder path here
CSV_OUTPUT = 'gait_features.csv'

# ---- Step 1: Load and preprocess first 50 images ----
def load_first_50_images(folder):
    image_files = sorted([
        f for f in os.listdir(folder)
        if f.lower().endswith(('.png', '.jpg', '.jpeg'))
    ])[:SEQUENCE_LENGTH]
    
    sequence = []
    for filename in image_files:
        img_path = os.path.join(folder, filename)
        img = Image.open(img_path).convert('L').resize(IMAGE_SIZE)
        img_array = np.array(img) / 255.0  # normalize
        sequence.append(img_array.flatten())  # flatten image to 1D
    return np.stack(sequence)  # shape: (50, 4096)

# ---- Step 2: Define RNN ----
class GaitRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(GaitRNN, self).__init__()
        self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, 
                           num_layers=num_layers, batch_first=True)

    def forward(self, x):
        _, (h_n, _) = self.rnn(x)
        return h_n[-1]  # final hidden state of last layer

# ---- Step 3: Extract features ----
def extract_rnn_features(image_folder):
    sequence = load_first_50_images(image_folder)
    if sequence.shape[0] < SEQUENCE_LENGTH:
        print(f"Warning: Only {sequence.shape[0]} images found. Padding to 50.")
        padding = np.zeros((SEQUENCE_LENGTH - sequence.shape[0], sequence.shape[1]))
        sequence = np.vstack([sequence, padding])

    sequence_tensor = torch.tensor(sequence, dtype=torch.float32).unsqueeze(0)  # (1, 50, 4096)

    input_size = sequence.shape[1]  # e.g., 64*64 = 4096
    hidden_size = 256
    num_layers = 1

    model = GaitRNN(input_size, hidden_size, num_layers)
    with torch.no_grad():
        features = model(sequence_tensor).squeeze().numpy()  # shape: (256,)
    return features

In [None]:
# combined_features=[]
# features = extract_rnn_features('/Users/shyampremi/Desktop/semantic_data/file_folder')
# csv_path ='/Users/shyampremi/Desktop/semantic_data/walks_v2.csv'  # Replace with path to your walks.csv
# sub = '000:00:0:0:bg'
# # Load walks.csv
# walk_df = pd.read_csv(csv_path)

#         # Look up matching row in walks.csv
# row = walk_df[walk_df['file_id'] == sub]

# if not row.empty:
#     extra_cols = row.drop(columns=['file_id','ID']).values.flatten().tolist()
#     combined = list(features) + extra_cols
#     combined_features.append(combined)
# else:
#     print(f"⚠️ No metadata found for: {sub}")
# print(combined_features)

In [None]:
import os
source = '/Users/shyampremi/Desktop/semantic_data/silhouettes'
top_level_dirs = [d for d in os.listdir(source) if os.path.isdir(os.path.join(source, d))]
# If the directories are named with numbers and you want them sorted numerically:
top_level_dirs.sort(key=lambda x: int(x))
for directory in top_level_dirs:
    full_path = os.path.join(source, directory)

In [None]:
import os
import re
source = '/Users/shyampremi/Desktop/semantic_data/silhouettes'
walks = '/Users/shyampremi/Desktop/semantic_data/walks_v2.csv'

# Step 1: Get top-level directories
top_level_dirs = [d for d in os.listdir(source) if os.path.isdir(os.path.join(source, d))]
# Sort top-level directories numerically (assuming names are strictly numeric)
top_level_dirs.sort(key=lambda x: int(x))

# Step 2: For each top-level directory, find subdirectories matching the pattern (e.g., "000:00:0:0:bg")
# Here we assume that the folder name should contain a colon and end with "bg".
pattern = re.compile(r'.*:.+:bg$')  # adjust this pattern as needed

for top_dir in top_level_dirs:
    top_dir_full = os.path.join(source, top_dir)
    
    # List only subdirectories in this top-level folder
    sub_dirs = [d for d in os.listdir(top_dir_full) if os.path.isdir(os.path.join(top_dir_full, d))]
    # Filter subdirectories that match the desired pattern
    filtered_sub_dirs = [d for d in sub_dirs if pattern.match(d)]
    
    # Sort subdirectories; you can modify the sort key if you need a specialized numeric order
    filtered_sub_dirs.sort()
    
    # Print full paths for these subdirectories
    for sub in filtered_sub_dirs:
        full_path = os.path.join(top_dir_full, sub)
        features = extract_rnn_features(full_path)
        

In [None]:
import os
import re
  # Replace with your source folder path

# Step 1: Get top-level directories (numeric names only)
top_level_dirs = [d for d in os.listdir(source) if os.path.isdir(os.path.join(source, d)) and d.isdigit()]

# Convert to integers for numeric sorting (optional: remove if you don't want to sort even top-levels)
top_level_dirs = sorted(top_level_dirs, key=lambda x: int(x))  # Remove this line if no sorting at all

# Regex pattern to match subdirectories like "000:00:0:0:bg"
pattern = re.compile(r'.*:.+:bg$')

for top_dir in top_level_dirs:
    top_dir_full = os.path.join(source, top_dir)

    # Step 2: Get subdirectories matching pattern — no sorting here
    sub_dirs = [
        d for d in os.listdir(top_dir_full)
        if os.path.isdir(os.path.join(top_dir_full, d)) and pattern.match(d)
    ]

    # No sorting — keep them as is
    for sub in sub_dirs:
        full_path = os.path.join(top_dir_full, sub)
        print(full_path)
        features = extract_rnn_features(full_path)
        


In [None]:
import os
import re
import pandas as pd

  # Replace with your actual path
csv_path ='/Users/shyampremi/Desktop/semantic_data/walks_v2.csv'  # Replace with path to your walks.csv
source = '/Users/shyampremi/Desktop/semantic_data/silhouettes'
# Load walks.csv
walk_df = pd.read_csv(csv_path)
count = 0
# Store the final combined features here
combined_features=[]

# Step 1: Get top-level directories (numeric only)
top_level_dirs = [d for d in os.listdir(source) if os.path.isdir(os.path.join(source, d)) and d.isdigit()]
top_level_dirs = sorted(top_level_dirs, key=lambda x: int(x))  # or remove this for unsorted order

# Regex pattern: subfolder like "000:00:0:0:bg"
# pattern = re.compile(r'.*:.+:bg$')
counter2 =0
for top_dir in top_level_dirs:
    top_dir_full = os.path.join(source, top_dir)
    counter2+=1
    sub_dirs = [
        d for d in os.listdir(top_dir_full)
        if os.path.isdir(os.path.join(top_dir_full, d))
    ]

    for sub in sub_dirs:
        full_path = os.path.join(top_dir_full, sub)
        count=count+1
        try:
            # Your custom feature extraction function
            features = extract_rnn_features(full_path)

            # Look up matching row in walks.csv
            row = walk_df[walk_df['file_id'] == sub]
            if not row.empty:
                extra_cols = row.drop(columns=['file_id']).values.flatten().tolist()
                combined = list(features) + extra_cols
                combined_features.append(combined)
            else:
                print(f"⚠️ No metadata found for: {sub}")

        except Exception as e:
            print(f"❌ Failed to process {sub}: {e}")
print(count)
import pandas as pd

# combined_features is a list of lists
df = pd.DataFrame(combined_features)

# Save to CSV
df.to_csv('data3.csv', index=False)

print("✅ data1.csv saved successfully.")


In [7]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,251,252,253,254,255,256,257,258,259,260
0,0.27903,-0.061138,0.129812,-0.351637,-0.124036,0.261636,0.083915,0.219206,0.41809,0.13665,...,0.175966,0.088912,-0.061518,0.114362,0.061686,0,45,txt,1,0
1,-0.260946,0.80901,0.009666,-0.29223,-0.226053,0.626382,0.236587,0.152981,-0.445667,0.253335,...,0.133853,-0.456553,-0.237454,-0.343399,0.199563,0,90,wsf,1,0
2,0.169253,-0.223878,0.016145,-0.116823,-0.222619,0.032462,0.104594,0.363502,0.035202,-0.307103,...,-0.146005,0.111609,0.013981,-0.758104,-0.258638,0,90,wss,1,0
3,0.314936,-0.193302,-0.044151,0.326467,-0.4202,0.052754,-0.100985,0.029985,-0.121092,0.500895,...,0.199053,-0.230224,0.437128,-0.032746,-0.000683,0,0,txt,1,0
4,0.197146,-0.422075,-0.617072,0.189402,0.330186,-0.480965,-0.099994,0.09614,0.026709,-0.181973,...,0.025238,0.320034,0.151364,-0.069154,-0.049833,0,45,txt,0,0


In [9]:
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv("data3.csv")

# Apply label encoding on column 258
le = LabelEncoder()
df['258'] = le.fit_transform(df['258'])

# Optional: save updated CSV
df.to_csv("data3_encoded.csv", index=False)

print("✅ Column 258 converted to numeric using label encoding.")
print(f"Mapping: {dict(zip(le.classes_, le.transform(le.classes_)))}")


✅ Column 258 converted to numeric using label encoding.
Mapping: {'bg': np.int64(0), 'cl': np.int64(1), 'nm': np.int64(2), 'ph': np.int64(3), 'txt': np.int64(4), 'wsf': np.int64(5), 'wss': np.int64(6)}


In [10]:
df.tail()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,251,252,253,254,255,256,257,258,259,260
14971,0.278816,0.118364,0.013785,0.162244,-0.143622,0.164734,-0.040556,0.347687,-0.047361,0.196819,...,-0.191072,-0.236842,-0.473997,-0.067896,-0.062684,311,0,4,0,0
14972,-0.394915,-0.056935,-0.168593,0.413154,0.014528,0.139336,-0.095306,0.061251,0.196748,-0.395727,...,-0.318556,-0.155436,-0.738442,-0.050176,-0.130347,311,45,4,1,0
14973,0.04939,0.423134,-0.63222,-0.404571,-0.120115,-0.121841,-0.085627,0.194474,-0.348734,0.609239,...,0.310999,-0.255843,0.221831,-0.421283,0.339965,311,0,4,1,0
14974,0.11292,0.194937,0.074945,-0.148105,-0.026537,-0.002516,-0.196806,0.22764,-0.031496,-0.432081,...,0.373555,-0.230487,-0.622406,0.421358,0.019913,311,90,5,1,0
14975,0.015035,0.070429,0.461833,0.254551,-0.411709,-0.35495,0.486317,0.107493,-0.179435,-0.477995,...,-0.138951,0.088872,0.196805,0.310344,0.27938,311,90,6,1,0


In [12]:
# Merge on the common column (adjust 'file_id' to your actual key)
labels_df = pd.read_csv('/Users/shyampremi/Desktop/semantic_data/labels.csv')
# Rename label column in labels_df to 'label' (if needed)
labels_df.columns = ['label']

# Append label column to df
df['label'] = labels_df['label']

# Save merged file
df.to_csv("data3_merged.csv", index=False)

print("✅ Label column added and saved as 'label' in data3_merged.csv")


✅ Label column added and saved as 'label' in data3_merged.csv


In [13]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,252,253,254,255,256,257,258,259,260,label
0,0.27903,-0.061138,0.129812,-0.351637,-0.124036,0.261636,0.083915,0.219206,0.41809,0.13665,...,0.088912,-0.061518,0.114362,0.061686,0,45,4,1,0,Typical
1,-0.260946,0.80901,0.009666,-0.29223,-0.226053,0.626382,0.236587,0.152981,-0.445667,0.253335,...,-0.456553,-0.237454,-0.343399,0.199563,0,90,5,1,0,Typical
2,0.169253,-0.223878,0.016145,-0.116823,-0.222619,0.032462,0.104594,0.363502,0.035202,-0.307103,...,0.111609,0.013981,-0.758104,-0.258638,0,90,6,1,0,Typical
3,0.314936,-0.193302,-0.044151,0.326467,-0.4202,0.052754,-0.100985,0.029985,-0.121092,0.500895,...,-0.230224,0.437128,-0.032746,-0.000683,0,0,4,1,0,Typical
4,0.197146,-0.422075,-0.617072,0.189402,0.330186,-0.480965,-0.099994,0.09614,0.026709,-0.181973,...,0.320034,0.151364,-0.069154,-0.049833,0,45,4,0,0,Typical


In [None]:
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])

In [15]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,252,253,254,255,256,257,258,259,260,label
0,0.27903,-0.061138,0.129812,-0.351637,-0.124036,0.261636,0.083915,0.219206,0.41809,0.13665,...,0.088912,-0.061518,0.114362,0.061686,0,45,4,1,0,2
1,-0.260946,0.80901,0.009666,-0.29223,-0.226053,0.626382,0.236587,0.152981,-0.445667,0.253335,...,-0.456553,-0.237454,-0.343399,0.199563,0,90,5,1,0,2
2,0.169253,-0.223878,0.016145,-0.116823,-0.222619,0.032462,0.104594,0.363502,0.035202,-0.307103,...,0.111609,0.013981,-0.758104,-0.258638,0,90,6,1,0,2
3,0.314936,-0.193302,-0.044151,0.326467,-0.4202,0.052754,-0.100985,0.029985,-0.121092,0.500895,...,-0.230224,0.437128,-0.032746,-0.000683,0,0,4,1,0,2
4,0.197146,-0.422075,-0.617072,0.189402,0.330186,-0.480965,-0.099994,0.09614,0.026709,-0.181973,...,0.320034,0.151364,-0.069154,-0.049833,0,45,4,0,0,2


1. Logistic Regression

In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Load the data


# Split the data into features and labels
X = df.iloc[:, :-1]  # all columns except the last one
y = df.iloc[:, -1]   # the last column is the label

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=280)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Logistic Regression model
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train_scaled, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test_scaled)
print("Logistic Regression Classification Report:")
print(classification_report(y_test, y_pred))

# Accuracy
accuracy = clf.score(X_test_scaled, y_test)
print(f"Accuracy: {accuracy:.4f}")


Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       523
           1       0.33      0.00      0.00       975
           2       0.67      1.00      0.80      2995

    accuracy                           0.67      4493
   macro avg       0.33      0.33      0.27      4493
weighted avg       0.52      0.67      0.53      4493

Accuracy: 0.6664


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


2. KNN

In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

# Load the data


# Split the data into features and labels
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=280)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the KNN model
clf = KNeighborsClassifier()
clf.fit(X_train_scaled, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test_scaled)
print("K-Nearest Neighbors Classification Report:")
print(classification_report(y_test, y_pred))

# Accuracy
accuracy = clf.score(X_test_scaled, y_test)
print(f"Accuracy: {accuracy:.4f}")


K-Nearest Neighbors Classification Report:
              precision    recall  f1-score   support

           0       0.13      0.05      0.07       523
           1       0.22      0.04      0.06       975
           2       0.67      0.92      0.77      2995

    accuracy                           0.63      4493
   macro avg       0.34      0.34      0.30      4493
weighted avg       0.51      0.63      0.54      4493

Accuracy: 0.6256


3. Support Vector Machine (SVM)

In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# Load the data


# Split the data into features and labels
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=280)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVM model
clf = SVC()
clf.fit(X_train_scaled, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test_scaled)
print("Support Vector Machine Classification Report:")
print(classification_report(y_test, y_pred))

# Accuracy
accuracy = clf.score(X_test_scaled, y_test)
print(f"Accuracy: {accuracy:.4f}")


Support Vector Machine Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       342
           1       0.00      0.00      0.00       674
           2       0.66      1.00      0.80      1980

    accuracy                           0.66      2996
   macro avg       0.22      0.33      0.27      2996
weighted avg       0.44      0.66      0.53      2996



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 0.6609


4. Decision Tree Classifier

In [28]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

# Load the data
# data = pd.read_csv('data3.csv')

# Split the data into features and labels
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Decision Tree model
clf = DecisionTreeClassifier()
clf.fit(X_train_scaled, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test_scaled)
print("Decision Tree Classification Report:")
print(classification_report(y_test, y_pred))

# Accuracy
accuracy = clf.score(X_test_scaled, y_test)
print(f"Accuracy: {accuracy:.4f}")


Decision Tree Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.95      0.96       495
           1       1.00      1.00      1.00       962
           2       0.99      1.00      0.99      3036

    accuracy                           0.99      4493
   macro avg       0.99      0.98      0.99      4493
weighted avg       0.99      0.99      0.99      4493

Accuracy: 0.9918


4.1 Tunning Decision Tree Classifiers

In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

# Load the data
# data = pd.read_csv('data3.csv')

# Split the data into features and labels
X = df.iloc[:, :-1]  # all columns except the last one
y = df.iloc[:, -1]   # the last column is the label

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Decision Tree model with overfitting prevention
clf = DecisionTreeClassifier(
    max_depth=5,               # Limit the maximum depth of the tree
    min_samples_split=10,      # Require at least 10 samples to split a node
    min_samples_leaf=5,        # Require at least 5 samples in a leaf node
    max_features='sqrt',       # Consider only a random subset of features at each split
    class_weight='balanced',   # Handle class imbalance by adjusting weights
    random_state=42            # Ensure reproducibility
)

# Train the model
clf.fit(X_train_scaled, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test_scaled)
print("Decision Tree Classification Report (with tuning):")
print(classification_report(y_test, y_pred))

# Accuracy
accuracy = clf.score(X_test_scaled, y_test)
print(f"Accuracy: {accuracy:.4f}")


Decision Tree Classification Report (with tuning):
              precision    recall  f1-score   support

           0       0.10      0.12      0.11       495
           1       0.20      0.31      0.25       962
           2       0.67      0.55      0.60      3036

    accuracy                           0.45      4493
   macro avg       0.33      0.32      0.32      4493
weighted avg       0.51      0.45      0.47      4493

Accuracy: 0.4478


In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

# Load the data
# data = pd.read_csv('data3.csv')

# Split the data into features and labels
X = df.iloc[:, :-1]  # all columns except the last one
y = df.iloc[:, -1]   # the last column is the label

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Decision Tree model with updated tuning
clf = DecisionTreeClassifier(
    criterion='entropy',           # Use 'entropy' for information gain
    max_depth=10,                  # Increase the maximum depth of the tree
    min_samples_split=5,           # Lower the minimum samples required to split
    min_samples_leaf=2,            # Lower the minimum samples required in a leaf node
    max_leaf_nodes=20,             # Limit the number of leaf nodes
    class_weight='balanced',       # Handle class imbalance by adjusting weights
    random_state=42                # Ensure reproducibility
)

# Train the model
clf.fit(X_train_scaled, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test_scaled)
print("Decision Tree Classification Report (with further tuning):")
print(classification_report(y_test, y_pred))

# Accuracy
accuracy = clf.score(X_test_scaled, y_test)
print(f"Accuracy: {accuracy:.4f}")


Decision Tree Classification Report (with further tuning):
              precision    recall  f1-score   support

           0       0.50      0.44      0.47       495
           1       0.28      0.68      0.39       962
           2       0.75      0.42      0.54      3036

    accuracy                           0.48      4493
   macro avg       0.51      0.51      0.47      4493
weighted avg       0.62      0.48      0.50      4493

Accuracy: 0.4772


5. Random Forest Classifier

In [27]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load the data
# data = pd.read_csv('data3.csv')

# Split the data into features and labels
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=280)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the Random Forest model
clf = RandomForestClassifier()
clf.fit(X_train_scaled, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test_scaled)
print("Random Forest Classification Report:")
print(classification_report(y_test, y_pred))

# Accuracy
accuracy = clf.score(X_test_scaled, y_test)
print(f"Accuracy: {accuracy:.4f}")


Random Forest Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.03      0.05       523
           1       0.00      0.00      0.00       975
           2       0.67      1.00      0.80      2995

    accuracy                           0.67      4493
   macro avg       0.56      0.34      0.28      4493
weighted avg       0.56      0.67      0.54      4493

Accuracy: 0.6693
