In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats
from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, root_mean_squared_error
import seaborn as sns

file_path = r'C:/Users/Dark Hades/Desktop/Challenge1_Data_Analysis_Dataset.csv'
df = pd.read_csv(file_path)

In [None]:
df

In [None]:
df2 = df.copy()
df2['Duration_H:M'] = pd.to_datetime(df2['Duration'], format='%H:%M:%S').dt.strftime('%H:%M')
df2['StartTime_H:M'] = pd.to_datetime(df2['Start time'], format='%H:%M:%S').dt.strftime('%H:%M')
df2['EndTime_H:M'] = pd.to_datetime(df2['End time'], format='%H:%M:%S').dt.strftime('%H:%M')
df2.head(5)

In [None]:
df2.describe(include='all')

In [None]:
df2.info()

In [None]:

# Filter for SE and PBR subjects
se_data = df[df['Subject'] == 'SE']
pbr_data = df[df['Subject'] == 'PBR']
dt_data = df[df['Subject'] == 'DT']
ppi_data = df[df['Subject'] == 'PPI']
dbw_data = df[df['Subject'] == 'dbw']


# Calculate average mood level for each subject
avg_mood_se = se_data['Mood Level'].mean()
avg_mood_pbr = pbr_data['Mood Level'].mean()
avg_mood_dt = dt_data['Mood Level'].mean()
avg_mood_dbw = dbw_data['Mood Level'].mean()
avg_mood_ppi = ppi_data['Mood Level'].mean()


# Calculate task completion rate for each subject
task_completion_rate_se = se_data['Task'].mean() * 100  # in percentage
task_completion_rate_pbr = pbr_data['Task'].mean() * 100  # in percentage
task_completion_rate_dt = dt_data['Task'].mean() * 100  # in percentage
task_completion_rate_dbw = dbw_data['Task'].mean() * 100  # in percentage
task_completion_rate_ppi = ppi_data['Task'].mean() * 100  # in percentage


# Display results


# Visualization
subjects = ['SE', 'PBR', 'DT', 'DBW', 'PPI']
avg_moods = [avg_mood_se, avg_mood_pbr, avg_mood_dt, avg_mood_dbw, avg_mood_ppi]
task_completion_rates = [task_completion_rate_se, task_completion_rate_pbr, task_completion_rate_dt, task_completion_rate_dbw, task_completion_rate_ppi]

# Plot Average Mood Levels
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.bar(subjects, avg_moods, color=['blue', 'orange', 'red', 'blue', 'green'])
plt.title('Average Mood Levels by Subject')
plt.ylabel('Mood Level')
plt.xlabel('Subject')

# Plot Task Completion Rates
plt.subplot(1, 2, 2)
plt.bar(subjects, task_completion_rates, color=['blue', 'orange', 'red', 'blue', 'green'])
plt.title('Task Completion Rates by Subject')
plt.ylabel('Completion Rate (%)')
plt.xlabel('Subject')

plt.tight_layout()
plt.show()


In [None]:
df2['Subject'].replace(to_replace='SE', value=2, inplace=True)
df2['Subject'].replace(to_replace='PBR', value=3, inplace=True)
df2['Subject'].replace(to_replace='DT', value=4, inplace=True)
df2['Subject'].replace(to_replace='DBW', value=5, inplace=True)
df2['Subject'].replace(to_replace='PPI', value=6, inplace=True)


In [None]:
df2["Duration_Minutes"] = df2["Duration_H:M"].apply(
    lambda x: int(x.split(":")[0]) * 60 + int(x.split(":")[1])
)
df3=df2[['Subject','Mood Level','Task', 'Duration_Minutes']]
correlation_matrix = df3.corr()
# Set the size of the heatmap
plt.figure(figsize=(8, 6))
# Create the heatmap
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap='coolwarm', square=True, cbar_kws={"shrink": .8})
# Add title
plt.title('Correlation Heatmap', fontsize=16)
# Show the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Convert 'Duration' to total hours
df2['Duration_Hours'] = df2['Duration'].apply(
    lambda x: int(x.split(":")[0]) + int(x.split(":")[1]) / 60 + int(x.split(":")[2]) / 3600
)

# Scatter plot: Study Duration (Hours) vs Mood Level
plt.figure(figsize=(8, 6))
plt.scatter(df2['Duration_Hours'], df2['Mood Level'], color='blue', alpha=0.7)
plt.title('Study Duration (Hours) vs Mood Level')
plt.xlabel('Study Duration (hours)')
plt.ylabel('Mood Level')
plt.grid(True, linestyle='--', alpha=0.5)
plt.show()


In [None]:
# Group study durations by 'Time of Day'
time_of_day_durations = df2.groupby('Time of Day')['Duration_Hours'].sum()

# Create a pie chart to visualize proportions
plt.figure(figsize=(5, 5))
time_of_day_durations.plot(kind='pie', autopct='%1.1f%%', startangle=90, 
                           colors=['orange', 'lightgreen', 'lightgreen', 'plum'], 
                           wedgeprops={'edgecolor': 'black','width': 0.6}, )

# Add title
plt.title('Proportion of Study Duration by Time of Day')
plt.ylabel('')  # Remove default ylabel
plt.show()


In [None]:
# Scatter plot: Study Duration vs Mood Level
plt.figure(figsize=(8, 6))
plt.scatter(df2['Duration_Minutes'], df2['Mood Level'], color='blue', alpha=0.7)
plt.title('Study Duration vs Mood Level')
plt.xlabel('Study Duration (minutes)')
plt.ylabel('Mood Level')
plt.grid(True, linestyle='--', alpha=0.5)
plt.show()


In [None]:
# Ensure the columns 'Mood Level' and 'Duration_Hours' are numeric
study_durations = df2['Duration_Hours']
mood_levels = df2['Mood Level']

# Calculate the correlation coefficient
correlation_coefficient = np.corrcoef(study_durations, mood_levels)[0, 1]

# Print the result
print(f'Correlation Coefficient for Study Duration vs Mood Levels is {correlation_coefficient:.2f}')

In [None]:
average_study_duration = df2["Duration_Hours"].mean()
average_mood_level = df2["Mood Level"].mean()

# Print the results
print(f"Average Study Duration: {average_study_duration:.2f} hours/session")
print(f"Average Mood Level: {average_mood_level:.2f}")


# Convert 'date' column to datetime (optional for sorting or advanced operations)
df2["date"] = pd.to_datetime(df2["Start date"])

# Calculate total study time per day
daily_study_time = df2.groupby("date")["Duration_Hours"].sum()

# Calculate the average study time per day
average_study_time_per_day = daily_study_time.mean()

# Print the results
print(f"Average Study Time per Day: {average_study_time_per_day:.2f} hours/day")