### Logistic Regression Model

In [1]:
# Import Required Libraries

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report


In [2]:
# Load the dataset
data = pd.read_csv('TrainingData.csv')

# Display basic information
print(data.info())

# Check the first 5 rows
print(data.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   BID        2000 non-null   int64 
 1   Bitstream  2000 non-null   object
 2   class      2000 non-null   int64 
dtypes: int64(2), object(1)
memory usage: 47.0+ KB
None
   BID                                          Bitstream  class
0    0  1000111010111101101100110111001111001000101111...      1
1    1  1101111100101011111111101101010001110110000010...      1
2    2  0011001010001010100100011101000111110100101111...      0
3    3  1101010110000110100001001100111101000000110001...      1
4    4  1010111100001001000101010010111010011101001100...      1


In [3]:
# Analyze the distribution of the labels
label_distribution = data['class'].value_counts()
print("Label distribution:\n", label_distribution)


Label distribution:
 class
1    1000
0    1000
Name: count, dtype: int64


In [4]:
# Convert bitstreams into numerical features
def bitstream_to_features(bitstream):
    return [int(bit) for bit in bitstream]

# Apply transformation
X = data['Bitstream'].apply(bitstream_to_features).tolist()
y = data['class']


In [5]:
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Confirm the shapes of the splits
print(f"Training samples: {len(X_train)}, Test samples: {len(X_test)}")


Training samples: 1600, Test samples: 400


In [6]:
# Train Logistic Regression
logreg = LogisticRegression(max_iter=500, random_state=42)
logreg.fit(X_train, y_train)

# Make predictions
y_pred = logreg.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.5125

Classification Report:
               precision    recall  f1-score   support

           0       0.51      0.52      0.52       200
           1       0.51      0.51      0.51       200

    accuracy                           0.51       400
   macro avg       0.51      0.51      0.51       400
weighted avg       0.51      0.51      0.51       400

