In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [5]:
#Load the dataset properly (assuming it's a CSV file with no column names):

df = pd.read_csv("binary_patterns.txt", header=None)
df.columns = [f"Feature_{i}" for i in range(df.shape[1])]


In [6]:
#Create lagged features (previous values to help the model recognize patterns):

for i in range(1, 4):  # Create 3 previous-step features
    df[f"Lag_{i}"] = df["Feature_0"].shift(i)

df.dropna(inplace=True)  # Remove NaN values from shifting


In [7]:
#handling imbalaced data

from imblearn.over_sampling import SMOTE

X = df.drop(columns=["Feature_0"])
y = df["Feature_0"]

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)


In [8]:
#Bulidng Random Forest Classifier with class balancing

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train the model
model = RandomForestClassifier(class_weight="balanced", n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [9]:
print(confusion_matrix(y_test, y_pred))
print('\n')
print(classification_report(y_test, y_pred))

[[435   0]
 [  1 452]]


              precision    recall  f1-score   support

           0       1.00      1.00      1.00       435
           1       1.00      1.00      1.00       453

    accuracy                           1.00       888
   macro avg       1.00      1.00      1.00       888
weighted avg       1.00      1.00      1.00       888

