In [23]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import joblib

# Load datasets
static = pd.read_csv("expanded_user_behavior_dataset.csv")
behav = pd.read_csv("mobile_usage_behavioral_analysis.csv")

# Merge on User ID
df_static = static.merge(behav.rename(columns={"User_ID": "User ID"}), on="User ID", how="inner")

# Encode categorical features
enc_dev = LabelEncoder().fit(df_static["Device Model"])
enc_os = LabelEncoder().fit(df_static["Operating System"])
enc_gender = LabelEncoder().fit(df_static["Gender_x"])

df_static["Device Model"] = enc_dev.transform(df_static["Device Model"])
df_static["Operating System"] = enc_os.transform(df_static["Operating System"])
df_static["Gender"] = enc_gender.transform(df_static["Gender_x"])

# Save encoders
joblib.dump(enc_dev, "enc_device.joblib")
joblib.dump(enc_os, "enc_os.joblib")
joblib.dump(enc_gender, "enc_gender.joblib")

# Select 9 static input features
X_static = df_static[[
    "Device Model", "Operating System", "Gender",
    "App Usage Time (min/day)", "Screen On Time (hours/day)",
    "Battery Drain (mAh/day)", "Number of Apps Installed",
    "Data Usage (MB/day)", "Age_x"
]].values

# Scale features
scaler = StandardScaler().fit(X_static)
X_static = scaler.transform(X_static)
joblib.dump(scaler, "scaler_static.joblib")

# Target
# Target labels (convert from 1-5 to 0-4)
y = df_static["User Behavior Class"].values - 1



In [24]:
# Create synthetic sequence data (7 days, 3 features)
SEQ_LEN = 7
SEQ_FEAT = 3
seq_tensor = np.random.rand(len(df_static), SEQ_LEN, SEQ_FEAT)

# Encode user IDs
user_ids = df_static["User ID"].values
user_ids = LabelEncoder().fit_transform(user_ids)


In [25]:
X_uid_train, X_uid_val, X_stat_train, X_stat_val, X_seq_train, X_seq_val, y_train, y_val = train_test_split(
    user_ids, X_static, seq_tensor, y, test_size=0.2, random_state=42, stratify=y
)


In [26]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Dense, Dropout, LayerNormalization, MultiHeadAttention, GlobalAveragePooling1D, Flatten, Concatenate
from tensorflow.keras.models import Model

# Inputs
uid_input = Input(shape=(), dtype=tf.int32, name="user_id")
static_input = Input(shape=(9,), name="static_input")
seq_input = Input(shape=(SEQ_LEN, SEQ_FEAT), name="sequence_input")

# User embedding
uid_embedding = Embedding(input_dim=len(np.unique(user_ids)) + 1, output_dim=8)(uid_input)
uid_flat = Flatten()(uid_embedding)

# Transformer encoder
x = LayerNormalization()(seq_input)
x = MultiHeadAttention(num_heads=2, key_dim=SEQ_FEAT)(x, x)
x = Dropout(0.2)(x)
x = GlobalAveragePooling1D()(x)

# Concatenate all
combined = Concatenate()([uid_flat, static_input, x])
x = Dense(64, activation='relu')(combined)
x = Dropout(0.3)(x)
x = Dense(32, activation='relu')(x)
output = Dense(5, activation='softmax')(x)  # 5 classes

model = Model(inputs=[uid_input, static_input, seq_input], outputs=output)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()


In [27]:
history = model.fit(
    [X_uid_train, X_stat_train, X_seq_train], y_train,
    validation_data=([X_uid_val, X_stat_val, X_seq_val], y_val),
    epochs=15,
    batch_size=32
)

# Save the model
model.save("user_behavior_transformer_model.keras")


Epoch 1/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 35ms/step - accuracy: 0.2765 - loss: 1.5464 - val_accuracy: 0.4650 - val_loss: 1.2969
Epoch 2/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.4769 - loss: 1.2585 - val_accuracy: 0.5200 - val_loss: 1.0701
Epoch 3/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5899 - loss: 1.0427 - val_accuracy: 0.6800 - val_loss: 0.8749
Epoch 4/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6658 - loss: 0.8659 - val_accuracy: 0.8150 - val_loss: 0.7108
Epoch 5/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.7837 - loss: 0.7097 - val_accuracy: 0.9150 - val_loss: 0.5619
Epoch 6/15
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.8540 - loss: 0.5703 - val_accuracy: 0.9450 - val_loss: 0.4314
Epoch 7/15
[1m25/25[0m [32m━━━━

In [28]:
# Print final training and validation accuracy
final_train_acc = history.history['accuracy'][-1]
final_val_acc = history.history['val_accuracy'][-1]

print(f"✅ Final Training Accuracy: {final_train_acc:.4f}")
print(f"✅ Final Validation Accuracy: {final_val_acc:.4f}")


✅ Final Training Accuracy: 0.9862
✅ Final Validation Accuracy: 0.9850


In [29]:
import os

model_files = [f for f in os.listdir() if f.endswith('.keras') or f.endswith('.h5')]
print("📦 Saved Keras Models:")
for f in model_files:
    print(" -", f)


📦 Saved Keras Models:
 - .keras
 - user_behavior_transformer_model.h5
 - user_behavior_transformer_model.keras


In [74]:
import torch
import torch.nn as nn

class ScreenBatteryTransformer(nn.Module):
    def __init__(self, input_dim, embed_dim=64, heads=4, layers=2):
        super().__init__()
        self.input_proj = nn.Linear(input_dim, embed_dim)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embed_dim, nhead=heads, batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=layers)
        self.output_layer = nn.Linear(embed_dim, 2)  # 2 outputs: screen_time, battery_drain

    def forward(self, x):
        x = self.input_proj(x).unsqueeze(1)  # (batch_size, 1, embed_dim)
        x = self.transformer(x)
        x = x.squeeze(1)  # (batch_size, embed_dim)
        return self.output_layer(x)


In [78]:
import pandas as pd

df_expanded  = pd.read_csv("expanded_user_behavior_dataset.csv")          # file 1
df_behavior  = pd.read_csv("mobile_usage_behavioral_analysis.csv")        # file 2


In [82]:
df = df_expanded.merge(
        df_behavior,
        left_on="User ID",   # from file 1
        right_on="User_ID",  # from file 2
        how="inner"
     )


In [84]:
# Use Gender column from file 1 (Gender_x after merge)
df["Gender"] = df["Gender_x"].map({"Male": 0, "Female": 1})

# Select and rename the 11 features you need
df1 = df.rename(columns={
        "App Usage Time (min/day)"       : "app_usage_min",
        "Number of Apps Installed"       : "apps_installed",
        "Data Usage (MB/day)"            : "data_usage",
        "Age_x"                          : "age",
        "Total_App_Usage_Hours"          : "total_app_usage",
        "Daily_Screen_Time_Hours"        : "daily_screen_time",
        "Social_Media_Usage_Hours"       : "social",
        "Productivity_App_Usage_Hours"   : "productivity",
        "Gaming_App_Usage_Hours"         : "gaming",
        "User Behavior Class"            : "user_behavior_class"
})

# Keep only the 11 columns in the right order
feature_cols = [
    "app_usage_min", "apps_installed", "data_usage", "age",
    "Gender", "total_app_usage", "daily_screen_time",
    "social", "productivity", "gaming", "user_behavior_class"
]
df1 = df1[feature_cols]


In [86]:
print(df1.head())
print("Shape:", df1.shape)   # (rows, 11)


   app_usage_min  apps_installed  data_usage  age  Gender  total_app_usage  \
0            393              67        1122   40       0             2.61   
1            268              42         944   47       1             2.13   
2            154              32         322   42       0             7.28   
3            239              56         871   20       0             1.20   
4            187              58         988   31       1             6.31   

   daily_screen_time  social  productivity  gaming  user_behavior_class  
0               7.15    4.43          0.55    2.40                    4  
1              13.79    4.67          4.42    2.43                    3  
2               4.50    4.58          1.71    2.83                    2  
3               6.29    3.18          3.42    4.58                    3  
4              12.59    3.15          0.13    4.00                    3  
Shape: (1000, 11)


In [96]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# ────── 1. Load and merge datasets ──────
df_expanded  = pd.read_csv("expanded_user_behavior_dataset.csv")
df_behavior  = pd.read_csv("mobile_usage_behavioral_analysis.csv")

df = df_expanded.merge(df_behavior, left_on="User ID", right_on="User_ID", how="inner")
df["gender"] = df["Gender_x"].map({"Male": 0, "Female": 1})

# Rename & select only the features
df1 = df.rename(columns={
    "App Usage Time (min/day)"     : "app_usage_min",
    "Number of Apps Installed"     : "apps_installed",
    "Data Usage (MB/day)"          : "data_usage",
    "Age_x"                        : "age",
    "Total_App_Usage_Hours"        : "total_app_usage",
    "Daily_Screen_Time_Hours"      : "daily_screen_time",
    "Social_Media_Usage_Hours"     : "social",
    "Productivity_App_Usage_Hours" : "productivity",
    "Gaming_App_Usage_Hours"       : "gaming",
    "User Behavior Class"          : "user_behavior_class",
    "Screen On Time (hours/day)"   : "screen_on_time",
    "Battery Drain (mAh/day)"      : "battery_drain"
})

# ────── 2. Prepare features and target ──────
feature_cols = [
    "app_usage_min", "apps_installed", "data_usage", "age", "gender",
    "total_app_usage", "daily_screen_time", "social", "productivity",
    "gaming", "user_behavior_class"
]
target_cols = ["screen_on_time", "battery_drain"]

X = df1[feature_cols].values
y = df1[target_cols].values

# ────── 3. Scale features ──────
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ────── 4. Train/test split (optional but recommended) ──────
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# ────── 5. Convert to PyTorch tensors ──────
X_tensor = torch.tensor(X_train, dtype=torch.float32)
y_tensor = torch.tensor(y_train, dtype=torch.float32)

# ────── 6. Define model ──────
class ScreenBatteryTransformer(nn.Module):
    def __init__(self, input_dim, embed_dim=64, heads=4, layers=2):
        super().__init__()
        self.input_proj = nn.Linear(input_dim, embed_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=heads, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=layers)
        self.output_layer = nn.Linear(embed_dim, 2)  # screen_time, battery_drain

    def forward(self, x):
        x = self.input_proj(x).unsqueeze(1)
        x = self.transformer(x)
        x = x.squeeze(1)
        return self.output_layer(x)

model = ScreenBatteryTransformer(input_dim=11)

# ────── 7. Loss & optimizer ──────
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# ────── 8. Training loop ──────
for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    preds = model(X_tensor)
    loss = criterion(preds, y_tensor)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

# ────── 9. Save model and scaler ──────
torch.save(model.state_dict(), "screen_battery_transformer.pt")
import joblib
joblib.dump(scaler, "screen_battery_scaler.joblib")


Epoch 0, Loss: 1504635.5000
Epoch 10, Loss: 1497469.7500
Epoch 20, Loss: 1496435.8750
Epoch 30, Loss: 1495293.7500
Epoch 40, Loss: 1494043.8750
Epoch 50, Loss: 1492665.8750
Epoch 60, Loss: 1491241.6250
Epoch 70, Loss: 1489721.8750
Epoch 80, Loss: 1488118.3750
Epoch 90, Loss: 1486417.0000


['screen_battery_scaler.joblib']

In [97]:
import pandas as pd

# Load the first two datasets
df1 = pd.read_csv("expanded_user_behavior_dataset.csv")
df2 = pd.read_csv("mobile_usage_behavioral_analysis.csv")

# Normalize column names
df1.rename(columns={
    "User ID": "user_id",
    "App Usage Time (min/day)": "app_usage_min",
    "Screen On Time (hours/day)": "screen_on_time",
    "Battery Drain (mAh/day)": "battery_drain",
    "Number of Apps Installed": "apps_installed",
    "Data Usage (MB/day)": "data_usage",
    "Age": "age",
    "Gender": "gender",
    "User Behavior Class": "user_behavior_class"
}, inplace=True)

df2.rename(columns={
    "User_ID": "user_id",
    "Total_App_Usage_Hours": "total_app_usage",
    "Daily_Screen_Time_Hours": "daily_screen_time",
    "Social_Media_Usage_Hours": "social",
    "Productivity_App_Usage_Hours": "productivity",
    "Gaming_App_Usage_Hours": "gaming"
}, inplace=True)

# Merge df1 and df2 on user_id
merged_df = pd.merge(df1, df2, on="user_id")

# Drop unnecessary columns
merged_df = merged_df[[
    "app_usage_min", "apps_installed", "data_usage", "age",
    "gender", "total_app_usage", "daily_screen_time",
    "social", "productivity", "gaming", "user_behavior_class",
    "screen_on_time", "battery_drain"
]]

# Encode gender
merged_df["gender"] = merged_df["gender"].map({"Male": 1, "Female": 0})

# Save for later use
merged_df.to_csv("merged_behavior_battery_dataset.csv", index=False)

print("✅ Merged dataset saved as 'merged_behavior_battery_dataset.csv'")


✅ Merged dataset saved as 'merged_behavior_battery_dataset.csv'


In [100]:
df1 = pd.read_csv("merged_behavior_battery_dataset.csv")
# Proceed with training your ScreenBatteryTransformer model...


In [102]:
# train_screen_battery_transformer.py
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import joblib

# ───────────────── 1. Load merged dataset ─────────────────
df = pd.read_csv("merged_behavior_battery_dataset.csv")

feature_cols = [
    "app_usage_min", "apps_installed", "data_usage", "age",
    "gender", "total_app_usage", "daily_screen_time",
    "social", "productivity", "gaming", "user_behavior_class"
]
target_cols = ["screen_on_time", "battery_drain"]

X = df[feature_cols].values
y = df[target_cols].values

# ───────────────── 2. Scale features & save scaler ─────────────────
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
joblib.dump(scaler, "screen_battery_scaler.joblib")
print("✅ Scaler saved: screen_battery_scaler.joblib")

# ───────────────── 3. Train/Test split ─────────────────
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# Convert to tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32)
X_val_t   = torch.tensor(X_val,   dtype=torch.float32)
y_val_t   = torch.tensor(y_val,   dtype=torch.float32)

# ───────────────── 4. Define Transformer Regressor ─────────────────
class ScreenBatteryTransformer(nn.Module):
    def __init__(self, input_dim, embed_dim=64, heads=4, layers=2):
        super().__init__()
        self.input_proj = nn.Linear(input_dim, embed_dim)
        enc_layer = nn.TransformerEncoderLayer(
            d_model=embed_dim, nhead=heads, batch_first=True
        )
        self.encoder = nn.TransformerEncoder(enc_layer, num_layers=layers)
        self.out = nn.Linear(embed_dim, 2)  # screen_time & battery_drain

    def forward(self, x):
        x = self.input_proj(x).unsqueeze(1)  # (B,1,E)
        x = self.encoder(x).squeeze(1)       # (B,E)
        return self.out(x)

model = ScreenBatteryTransformer(input_dim=11)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# ───────────────── 5. Training loop ─────────────────
EPOCHS = 120
for epoch in range(1, EPOCHS + 1):
    # --- Train ---
    model.train()
    optimizer.zero_grad()
    pred = model(X_train_t)
    loss = criterion(pred, y_train_t)
    loss.backward()
    optimizer.step()

    # --- Validate ---
    if epoch % 10 == 0 or epoch == 1:
        model.eval()
        with torch.no_grad():
            val_pred = model(X_val_t)
            val_loss = criterion(val_pred, y_val_t).item()
        print(f"Epoch {epoch:3d} | Train Loss: {loss.item():.4f} | Val Loss: {val_loss:.4f}")

# ───────────────── 6. Save model weights ─────────────────
torch.save(model.state_dict(), "screen_battery_transformer_11.pt")
print("✅ Model weights saved: screen_battery_transformer_11.pt")


✅ Scaler saved: screen_battery_scaler.joblib
Epoch   1 | Train Loss: 1505266.5000 | Val Loss: 1443951.5000
Epoch  10 | Train Loss: 1497751.2500 | Val Loss: 1440516.5000
Epoch  20 | Train Loss: 1496737.7500 | Val Loss: 1439471.3750
Epoch  30 | Train Loss: 1495638.5000 | Val Loss: 1438358.1250
Epoch  40 | Train Loss: 1494422.5000 | Val Loss: 1437161.8750
Epoch  50 | Train Loss: 1493071.0000 | Val Loss: 1435875.8750
Epoch  60 | Train Loss: 1491653.5000 | Val Loss: 1434488.1250
Epoch  70 | Train Loss: 1490141.0000 | Val Loss: 1433005.5000
Epoch  80 | Train Loss: 1488541.5000 | Val Loss: 1431433.5000
Epoch  90 | Train Loss: 1486848.6250 | Val Loss: 1429775.0000
Epoch 100 | Train Loss: 1485066.8750 | Val Loss: 1428030.1250
Epoch 110 | Train Loss: 1483189.5000 | Val Loss: 1426200.7500
Epoch 120 | Train Loss: 1481241.6250 | Val Loss: 1424287.6250
✅ Model weights saved: screen_battery_transformer_11.pt
