In [21]:
# 1. Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, StandardScaler
import warnings
warnings.filterwarnings("ignore")

In [23]:
# 2. Load the dataset
df = pd.read_csv("ChildBehaviourAnalysis.csv")
print("Shape:", df.shape)
df.sample(5)

Shape: (200, 9)


Unnamed: 0,Age,Gender,Sleep_Hours,Study_Hours,Play_Hours,Screen_Time,Meals_Per_Day,Mood,Behavior_Category
150,12,Male,7.4,2.0,1.6,2.3,2,Angry,Needs Improvement
36,11,Female,9.3,3.7,0.7,0.9,4,Angry,Healthy
54,11,Male,5.4,0.2,3.7,4.4,2,Happy,Unhealthy
157,11,Male,6.4,1.6,2.9,1.7,3,Happy,Needs Improvement
1,6,Female,8.8,2.1,1.0,0.3,4,Happy,Healthy


In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Age                200 non-null    int64  
 1   Gender             200 non-null    object 
 2   Sleep_Hours        200 non-null    float64
 3   Study_Hours        200 non-null    float64
 4   Play_Hours         200 non-null    float64
 5   Screen_Time        200 non-null    float64
 6   Meals_Per_Day      200 non-null    int64  
 7   Mood               200 non-null    object 
 8   Behavior_Category  200 non-null    object 
dtypes: float64(4), int64(2), object(3)
memory usage: 14.2+ KB


In [27]:
df.isnull().sum()

Age                  0
Gender               0
Sleep_Hours          0
Study_Hours          0
Play_Hours           0
Screen_Time          0
Meals_Per_Day        0
Mood                 0
Behavior_Category    0
dtype: int64

In [29]:
df.columns

Index(['Age', 'Gender', 'Sleep_Hours', 'Study_Hours', 'Play_Hours',
       'Screen_Time', 'Meals_Per_Day', 'Mood', 'Behavior_Category'],
      dtype='object')

In [31]:
# Create age groups
bins = [0, 5, 12, 15]
labels = ['Toddler', 'Child', 'Teen']
df['Age_Group'] = pd.cut(df['Age'], bins=bins, labels=labels)

In [33]:
# Calculate total active hours
df['Total_Hours'] = df['Sleep_Hours'] + df['Study_Hours'] + df['Play_Hours'] + df['Screen_Time']

In [35]:
df.head(5)

Unnamed: 0,Age,Gender,Sleep_Hours,Study_Hours,Play_Hours,Screen_Time,Meals_Per_Day,Mood,Behavior_Category,Age_Group,Total_Hours
0,14,Female,8.9,3.0,1.0,0.3,5,Angry,Healthy,Teen,13.2
1,6,Female,8.8,2.1,1.0,0.3,4,Happy,Healthy,Child,12.2
2,11,Female,8.4,3.5,1.3,0.9,3,Happy,Healthy,Child,14.1
3,14,Male,8.8,3.5,1.2,0.9,4,Happy,Healthy,Teen,14.4
4,9,Male,9.3,3.6,1.2,0.6,5,Happy,Healthy,Child,14.7


In [37]:
numerical_col = [i for i in df.columns if df[i].dtype in ['int64', 'float64']]
print(numerical_col)

['Age', 'Sleep_Hours', 'Study_Hours', 'Play_Hours', 'Screen_Time', 'Meals_Per_Day', 'Total_Hours']


In [39]:
categorical_col = [col for col in df.columns if col not in numerical_col]

print(categorical_col)

['Gender', 'Mood', 'Behavior_Category', 'Age_Group']


In [41]:
# Encode categorical columns
label_encoders = {}
df_encoded = df.copy()
for col in categorical_col:
    le = LabelEncoder()
    df_encoded[col] = le.fit_transform(df_encoded[col])
    label_encoders[col] = le

In [43]:
df_encoded.head(5)

Unnamed: 0,Age,Gender,Sleep_Hours,Study_Hours,Play_Hours,Screen_Time,Meals_Per_Day,Mood,Behavior_Category,Age_Group,Total_Hours
0,14,0,8.9,3.0,1.0,0.3,5,0,0,1,13.2
1,6,0,8.8,2.1,1.0,0.3,4,1,0,0,12.2
2,11,0,8.4,3.5,1.3,0.9,3,1,0,0,14.1
3,14,1,8.8,3.5,1.2,0.9,4,1,0,1,14.4
4,9,1,9.3,3.6,1.2,0.6,5,1,0,0,14.7


In [45]:
label_encoders

{'Gender': LabelEncoder(),
 'Mood': LabelEncoder(),
 'Behavior_Category': LabelEncoder(),
 'Age_Group': LabelEncoder()}

In [47]:
# Display processed dataframe
print(df[['Age', 'Age_Group', 'Gender', 'Mood', 'Meals_Per_Day','Total_Hours']])

     Age Age_Group  Gender   Mood  Meals_Per_Day  Total_Hours
0     14      Teen  Female  Angry              5         13.2
1      6     Child  Female  Happy              4         12.2
2     11     Child  Female  Happy              3         14.1
3     14      Teen    Male  Happy              4         14.4
4      9     Child    Male  Happy              5         14.7
..   ...       ...     ...    ...            ...          ...
195    9     Child  Female  Angry              3         11.5
196   12     Child    Male    Sad              3         14.1
197    9     Child    Male  Happy              2         13.0
198    6     Child    Male  Angry              3         12.7
199   13      Teen    Male  Happy              3         13.3

[200 rows x 6 columns]
