In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [2]:
data = pd.read_csv('cleaned_data_no_outliers.csv')

In [3]:

X = data.drop(columns=["Total Time (hrs/day)", "Apps Installed Category"])
y = data["Total Time (hrs/day)"]


categorical_cols = X.select_dtypes(include=["object"]).columns
numerical_cols = X.select_dtypes(exclude=["object"]).columns

In [4]:
numerical_pipeline = Pipeline(steps=[("scaler", StandardScaler())])
categorical_pipeline = Pipeline(steps=[("onehot", OneHotEncoder(handle_unknown="ignore"))])


preprocessor = ColumnTransformer(
    transformers=[
        ("num", numerical_pipeline, numerical_cols),
        ("cat", categorical_pipeline, categorical_cols)
    ]
)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


X_train_preprocessed = preprocessor.fit_transform(X_train)
X_test_preprocessed = preprocessor.transform(X_test)

In [5]:
model = Sequential([
    Dense(64, input_dim=X_train_preprocessed.shape[1], activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1)  
])


model.compile(optimizer='adam', loss='mean_squared_error')


model.fit(X_train_preprocessed, y_train, epochs=100, batch_size=10, validation_data=(X_test_preprocessed, y_test))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m1415/1415[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 65.1322 - val_loss: 0.1780
Epoch 2/100
[1m1415/1415[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 0.1258 - val_loss: 0.0371
Epoch 3/100
[1m1415/1415[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 0.0292 - val_loss: 0.0136
Epoch 4/100
[1m1415/1415[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 0.0127 - val_loss: 0.0107
Epoch 5/100
[1m1415/1415[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 0.0081 - val_loss: 0.0135
Epoch 6/100
[1m1415/1415[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 0.0107 - val_loss: 0.0300
Epoch 7/100
[1m1415/1415[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 0.0095 - val_loss: 0.0038
Epoch 8/100
[1m1415/1415[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 0.0085 - val_loss: 0.0184
Epoch 9/100
[1

<keras.src.callbacks.history.History at 0x128a55539d0>

In [6]:
y_pred = model.predict(X_test_preprocessed)


mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")


for i in range(5):
    print(f'Predicted Total Time: {y_pred[i][0]:.2f}, Actual Total Time: {y_test.iloc[i]}')

[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Mean Squared Error: 0.00034833209802235843
R^2 Score: 0.9999830724168286
Predicted Total Time: 19.01, Actual Total Time: 18.988333333333333
Predicted Total Time: 13.59, Actual Total Time: 13.586666666666668
Predicted Total Time: 27.85, Actual Total Time: 27.85333333333333
Predicted Total Time: 16.44, Actual Total Time: 16.453333333333333
Predicted Total Time: 22.35, Actual Total Time: 22.356666666666666
