1.Data Loading

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv("screen_time_data.csv")

print("Data Loaded Successfully ✅")


Data Loaded Successfully ✅


2.Data Understanding


In [3]:
print("First 5 rows:")
print(df.head())

print("Last 5 rows:")
print(df.tail())

print("Shape of dataset:")
print(df.shape)

print("Dataset Info:")
print(df.info())

print("Statistical Summary:")
print(df.describe())


First 5 rows:
         Date  Screen Time  Notifications  Times opened        App
0  08/26/2022           38             70            49  Instagram
1  08/27/2022           39             43            48  Instagram
2  08/28/2022           64            231            55  Instagram
3  08/29/2022           14             35            23  Instagram
4  08/30/2022            3             19             5  Instagram
Last 5 rows:
          Date  Screen Time  Notifications  Times opened       App
49  09/17/2022          212            212           120  Whatsapp
50  09/18/2022          244            303           132  Whatsapp
51  09/19/2022           77            169           105  Whatsapp
52  09/20/2022           58            190            78  Whatsapp
53  09/21/2022           89            262            68  Whatsapp
Shape of dataset:
(54, 5)
Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54 entries, 0 to 53
Data columns (total 5 columns):
 #   Column         Non-Nul

3.Data Preprocessing

Removing Null values

In [4]:
print("Missing values:")
print(df.isnull().sum())

# If null values exist
df.fillna(method='ffill', inplace=True)


Missing values:
Date             0
Screen Time      0
Notifications    0
Times opened     0
App              0
dtype: int64


  df.fillna(method='ffill', inplace=True)


Remove Unnamed columns

In [5]:
# Remove unnamed columns if present
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]


Data type conversions

In [6]:
df['Screen_Minutes'] = df['Screen Time'] 


Convert Date to Datetime

In [7]:
df['Date'] = pd.to_datetime(df['Date'])


Check Datatypes

In [8]:
print(df.dtypes)


Date              datetime64[ns]
Screen Time                int64
Notifications              int64
Times opened               int64
App                       object
Screen_Minutes             int64
dtype: object


Encoding App

In [9]:
df = pd.get_dummies(df, columns=['App'], drop_first=True)


4.Feature Engineering

In [10]:
df['Day'] = df['Date'].dt.day
df['Month'] = df['Date'].dt.month


5.Feature Selection

Independent Variable (X)

In [12]:
X = df[['Notifications', 'Times opened', 'App_Whatsapp']]


Dependent Variable (y)

In [11]:
y = df['Screen_Minutes']


6.Feature Selection

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


7.Splitting Data

In [14]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)


8.Model Selection

Linear Regression

In [16]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error

lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred_lr = lr.predict(X_test)

print("Linear Regression R2:", r2_score(y_test, y_pred_lr))


Linear Regression R2: 0.6938659608875213


Decision Tree

In [17]:
from sklearn.tree import DecisionTreeRegressor

dtr = DecisionTreeRegressor()
dtr.fit(X_train, y_train)

y_pred_dtr = dtr.predict(X_test)

print("Decision Tree R2:", r2_score(y_test, y_pred_dtr))


Decision Tree R2: 0.25519757837431933


Random Forest

In [18]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)

print("Random Forest R2:", r2_score(y_test, y_pred_rf))


Random Forest R2: 0.51526758944215


Support Vector Regression

In [19]:
from sklearn.svm import SVR

svr = SVR()
svr.fit(X_train, y_train)

y_pred_svr = svr.predict(X_test)

print("SVR R2:", r2_score(y_test, y_pred_svr))


SVR R2: -0.025870433220516187


9.Model Prediction

In [20]:
# Example: Notifications=200, Times opened=80, Whatsapp=1
new_data = [[200, 80, 1]]

new_data_scaled = scaler.transform(new_data)

prediction = rf.predict(new_data_scaled)

print("Predicted Screen Time (minutes):", prediction[0])


Predicted Screen Time (minutes): 97.59




10.Save Model

In [21]:
import pickle

pickle.dump(rf, open("screen_time_model.pkl", "wb"))
pickle.dump(scaler, open("screen_time_scaler.pkl", "wb"))

print("Model Saved Successfully")


Model Saved Successfully


Model Comparision

In [22]:
results = {
    "Linear Regression": r2_score(y_test, y_pred_lr),
    "Decision Tree": r2_score(y_test, y_pred_dtr),
    "Random Forest": r2_score(y_test, y_pred_rf),
    "SVR": r2_score(y_test, y_pred_svr)
}

print(results)


{'Linear Regression': 0.6938659608875213, 'Decision Tree': 0.25519757837431933, 'Random Forest': 0.51526758944215, 'SVR': -0.025870433220516187}
