# Load Data

In [None]:
import pandas as pd

# Load the CSV file into a pandas DataFrame
df = pd.read_csv('/NguyenEnergyDataRaw.csv')

# Display the first 5 rows of the DataFrame
display(df)

Unnamed: 0,Timestamp,Location,sleep_hours,nap_hours,coffee_intake,exercise_binary,emotion_score,energy_level,product_score
0,10/6/2025 11:13:45,Work,6.0,0.0,2.0,No,3,4,4
1,10/6/2025 15:32:46,Work,6.0,0.0,3.0,No,2,3,2
2,10/6/2025 19:10:15,School,6.0,0.0,2.0,Yes,4,4,4
3,10/7/2025 11:38:06,Work,7.5,0.0,1.0,No,3,3,4
4,10/7/2025 15:02:54,Work,7.0,0.0,2.0,No,2,3,3
...,...,...,...,...,...,...,...,...,...
177,11/23/2025 15:10:01,Home / Other,8.0,0.0,1.0,No,4,4,5
178,11/25/2025 14:27:40,Work,7.0,0.0,1.0,No,4,3,4
179,11/29/2025 8:35:15,School,7.0,0.0,0.5,No,4,4,4
180,11/29/2025 16:38:20,School,7.0,0.0,0.0,No,4,4,4


# Remove Duplicates




In [25]:
print(f"DataFrame shape before dropping duplicates: {df.shape}")

# Remove duplicate rows
df.drop_duplicates(inplace=True)

print(f"DataFrame shape after dropping duplicates: {df.shape}")

# Display the head of the DataFrame to show the result
display(df)

DataFrame shape before dropping duplicates: (182, 9)
DataFrame shape after dropping duplicates: (163, 9)


Unnamed: 0,Timestamp,Location,sleep_hours,nap_hours,coffee_intake,exercise_binary,emotion_score,energy_level,product_score
0,10/6/2025 11:13:45,Work,6.0,0.0,2.0,No,3,4,4
1,10/6/2025 15:32:46,Work,6.0,0.0,3.0,No,2,3,2
2,10/6/2025 19:10:15,School,6.0,0.0,2.0,Yes,4,4,4
3,10/7/2025 11:38:06,Work,7.5,0.0,1.0,No,3,3,4
4,10/7/2025 15:02:54,Work,7.0,0.0,2.0,No,2,3,3
...,...,...,...,...,...,...,...,...,...
177,11/23/2025 15:10:01,Home / Other,8.0,0.0,1.0,No,4,4,5
178,11/25/2025 14:27:40,Work,7.0,0.0,1.0,No,4,3,4
179,11/29/2025 8:35:15,School,7.0,0.0,0.5,No,4,4,4
180,11/29/2025 16:38:20,School,7.0,0.0,0.0,No,4,4,4


# Catagorical Encode Location feature
Mappings:
* Work         -> 1
* School       -> 2
* Class        -> 3
* Home / Other -> 4






In [26]:
# Create a copy of the original DataFrame
df_new = df.copy()

mapping = {
    'Work': 1,
    'School': 2,
    'Class': 3,
    'Home / Other': 4
}
df_new['Location'] = df_new['Location'].replace(mapping)

# Display the head of the new DataFrame to show the updated 'Location' column
display(df_new)

  df_new['Location'] = df_new['Location'].replace(mapping)


Unnamed: 0,Timestamp,Location,sleep_hours,nap_hours,coffee_intake,exercise_binary,emotion_score,energy_level,product_score
0,10/6/2025 11:13:45,1,6.0,0.0,2.0,No,3,4,4
1,10/6/2025 15:32:46,1,6.0,0.0,3.0,No,2,3,2
2,10/6/2025 19:10:15,2,6.0,0.0,2.0,Yes,4,4,4
3,10/7/2025 11:38:06,1,7.5,0.0,1.0,No,3,3,4
4,10/7/2025 15:02:54,1,7.0,0.0,2.0,No,2,3,3
...,...,...,...,...,...,...,...,...,...
177,11/23/2025 15:10:01,4,8.0,0.0,1.0,No,4,4,5
178,11/25/2025 14:27:40,1,7.0,0.0,1.0,No,4,3,4
179,11/29/2025 8:35:15,2,7.0,0.0,0.5,No,4,4,4
180,11/29/2025 16:38:20,2,7.0,0.0,0.0,No,4,4,4


# Categorical encoding exercise_binary column
Mapping of "Have I exercised yet?"
* No -> 0
* Yes -> 1

In [27]:
binary_mapping = {
    'No': 0,
    'Yes': 1
}
df_new['exercise_binary'] = df_new['exercise_binary'].replace(binary_mapping)

# Display the head of the DataFrame to show the updated 'exercise_binary' column
display(df_new)

  df_new['exercise_binary'] = df_new['exercise_binary'].replace(binary_mapping)


Unnamed: 0,Timestamp,Location,sleep_hours,nap_hours,coffee_intake,exercise_binary,emotion_score,energy_level,product_score
0,10/6/2025 11:13:45,1,6.0,0.0,2.0,0,3,4,4
1,10/6/2025 15:32:46,1,6.0,0.0,3.0,0,2,3,2
2,10/6/2025 19:10:15,2,6.0,0.0,2.0,1,4,4,4
3,10/7/2025 11:38:06,1,7.5,0.0,1.0,0,3,3,4
4,10/7/2025 15:02:54,1,7.0,0.0,2.0,0,2,3,3
...,...,...,...,...,...,...,...,...,...
177,11/23/2025 15:10:01,4,8.0,0.0,1.0,0,4,4,5
178,11/25/2025 14:27:40,1,7.0,0.0,1.0,0,4,3,4
179,11/29/2025 8:35:15,2,7.0,0.0,0.5,0,4,4,4
180,11/29/2025 16:38:20,2,7.0,0.0,0.0,0,4,4,4


# Convert TimeStamp feature to Hour of Day
* Drop the date from the timestamp
* Convert to date time objects

In [28]:
# Reset the 'Timestamp' column in df_new to its original string format from df
df_new['Timestamp'] = df['Timestamp'].copy()

# Convert 'Timestamp' column to datetime objects, inferring mixed formats
df_new['Timestamp'] = pd.to_datetime(df_new['Timestamp'], format='mixed')

# Extract only the time part from the 'Timestamp' column
df_new['Timestamp'] = df_new['Timestamp'].dt.time

# Extract the hour from the original Timestamp column into a new column
df_new['hour_of_day'] = pd.to_datetime(df['Timestamp'], format='mixed').dt.hour

# Drop the original Timestamp column
df_new = df_new.drop('Timestamp', axis=1)

# Display the updated DataFrame
display(df_new.head())

Unnamed: 0,Location,sleep_hours,nap_hours,coffee_intake,exercise_binary,emotion_score,energy_level,product_score,hour_of_day
0,1,6.0,0.0,2.0,0,3,4,4,11
1,1,6.0,0.0,3.0,0,2,3,2,15
2,2,6.0,0.0,2.0,1,4,4,4,19
3,1,7.5,0.0,1.0,0,3,3,4,11
4,1,7.0,0.0,2.0,0,2,3,3,15


# Save to new CSV

In [29]:
df_new.to_csv('/NguyenEnergyDataCleaned.csv', index=False)
print("DataFrame 'df_new' successfully saved to 'NguyenEnergyDataCleaned.csv'")

DataFrame 'df_new' successfully saved to 'NguyenEnergyDataCleaned.csv'
