In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, BatchNormalization, ReLU, Dropout, Dense, GlobalAvgPool1D
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import psutil

In [2]:
tf.keras.mixed_precision.set_global_policy('mixed_float16')

In [3]:
def get_optimal_chunk_size(dataset_path, initial_chunk_size=100000):
    chunk_size = initial_chunk_size
    for chunk in pd.read_csv(dataset_path, chunksize=chunk_size):
        memory_usage = psutil.virtual_memory().percent
        print(f"Memory Usage: {memory_usage}%")
        
        if memory_usage > 90: 
            chunk_size = max(chunk_size // 2, 10000) 
            print(f"Reducing chunk size to {chunk_size}")
        elif memory_usage < 80:  
            chunk_size = min(chunk_size * 2, 500000)  
            print(f"Increasing chunk size to {chunk_size}")
        
        process_chunk(chunk)
        
    return chunk_size

In [4]:
def process_chunk(chunk):
    processed_data.append(chunk)


In [5]:
dataset_path = "C:/Users/admin/OneDrive/Desktop/my folder/college work/projects and ideas/driving_behavior_large_dataset.csv"
processed_data = []
optimal_chunk_size = get_optimal_chunk_size(dataset_path)


Memory Usage: 52.7%
Increasing chunk size to 200000
Memory Usage: 52.9%
Increasing chunk size to 400000
Memory Usage: 52.8%
Increasing chunk size to 500000
Memory Usage: 52.9%
Increasing chunk size to 500000
Memory Usage: 53.1%
Increasing chunk size to 500000
Memory Usage: 53.1%
Increasing chunk size to 500000
Memory Usage: 53.2%
Increasing chunk size to 500000
Memory Usage: 53.3%
Increasing chunk size to 500000
Memory Usage: 53.4%
Increasing chunk size to 500000
Memory Usage: 53.4%
Increasing chunk size to 500000
Memory Usage: 53.6%
Increasing chunk size to 500000
Memory Usage: 53.7%
Increasing chunk size to 500000
Memory Usage: 53.8%
Increasing chunk size to 500000
Memory Usage: 53.8%
Increasing chunk size to 500000
Memory Usage: 54.0%
Increasing chunk size to 500000
Memory Usage: 54.1%
Increasing chunk size to 500000
Memory Usage: 54.2%
Increasing chunk size to 500000
Memory Usage: 54.3%
Increasing chunk size to 500000
Memory Usage: 54.4%
Increasing chunk size to 500000
Memory Usage

In [6]:
print(f"Optimal Chunk Size: {optimal_chunk_size}")

Optimal Chunk Size: 500000


In [7]:
df = pd.concat(processed_data, ignore_index=True)

In [8]:

df = df.dropna()

In [9]:
X = df.drop(columns=['Event Label']) 
y = df['Event Label']


In [10]:
X['Timestamp'] = pd.to_datetime(X['Timestamp'], errors='coerce').astype(int) / 10**9


In [11]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [12]:
scaler = StandardScaler()
X = scaler.fit_transform(X)


In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
import numpy as np

model = Sequential([
    Input(shape=(X_train.shape[1],)),  
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(len(np.unique(y)), activation='softmax')  
])

In [15]:
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [16]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)

In [17]:
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=128,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping, reduce_lr]
)

Epoch 1/50
[1m62500/62500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 3ms/step - accuracy: 0.9590 - loss: 0.1154 - val_accuracy: 0.9904 - val_loss: 0.0238 - learning_rate: 0.0010
Epoch 2/50
[1m62500/62500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 3ms/step - accuracy: 0.9843 - loss: 0.0398 - val_accuracy: 0.9932 - val_loss: 0.0184 - learning_rate: 0.0010
Epoch 3/50
[1m62500/62500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 3ms/step - accuracy: 0.9871 - loss: 0.0332 - val_accuracy: 0.9951 - val_loss: 0.0132 - learning_rate: 0.0010
Epoch 4/50
[1m62500/62500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 3ms/step - accuracy: 0.9898 - loss: 0.0274 - val_accuracy: 0.9948 - val_loss: 0.0131 - learning_rate: 0.0010
Epoch 5/50
[1m62500/62500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 3ms/step - accuracy: 0.9919 - loss: 0.0226 - val_accuracy: 0.9970 - val_loss: 0.0082 - learning_rate: 0.0010
Epoch 6/50
[1m62500/62500[0m [32m━━━━

In [18]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")


[1m62500/62500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 873us/step - accuracy: 0.9996 - loss: 0.0017
Test Accuracy: 1.00


In [19]:
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)


[1m62500/62500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 542us/step


In [20]:
print("Classification Report:")
print(classification_report(y_test, y_pred_classes))


Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    577206
           1       1.00      1.00      1.00     39573
           2       1.00      1.00      1.00   1279501
           3       1.00      1.00      1.00     39662
           4       1.00      1.00      1.00     64058

    accuracy                           1.00   2000000
   macro avg       1.00      1.00      1.00   2000000
weighted avg       1.00      1.00      1.00   2000000



In [21]:
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_classes))


Confusion Matrix:
[[ 576856      18     300      32       0]
 [     18   39505      43       0       7]
 [     21      47 1279291      85      57]
 [     16       0      21   39621       4]
 [      0       3      24       2   64029]]


In [23]:
import keras.saving
keras.saving.save_model(model, "driving_behavior_model.keras")
