In [1]:
import pandas as pd
df = pd.read_csv('teacher_workload.csv')
print(df.head())

  Teacher_ID      Task  Hours_Spent  Class_Size  Subject Day_of_Week
0         T1   grading            8          45     Math      Monday
1         T1  teaching            6          45     Math     Tuesday
2         T1  planning            4          45     Math   Wednesday
3         T2   grading            7          40  English      Monday
4         T2  teaching            5          40  English    Thursday


In [2]:
print(df.info())
print(df.isnull().sum())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 98 entries, 0 to 97
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Teacher_ID   98 non-null     object
 1   Task         98 non-null     object
 2   Hours_Spent  98 non-null     int64 
 3   Class_Size   98 non-null     int64 
 4   Subject      98 non-null     object
 5   Day_of_Week  98 non-null     object
dtypes: int64(2), object(4)
memory usage: 4.7+ KB
None
Teacher_ID     0
Task           0
Hours_Spent    0
Class_Size     0
Subject        0
Day_of_Week    0
dtype: int64


In [3]:
df['Task'] = df['Task'].str.lower()
df['Hours_Spent'] = pd.to_numeric(df['Hours_Spent'], errors='coerce')
df['Class_Size'] = pd.to_numeric(df['Class_Size'], errors='coerce')
df = df[df['Hours_Spent'] <= 20]  # Remove unrealistic hours
df.to_csv('teacher_workload_cleaned.csv', index=False)

In [4]:
print(df.groupby('Task')['Hours_Spent'].mean())

Task
grading     7.818182
planning    3.656250
teaching    5.636364
Name: Hours_Spent, dtype: float64


In [5]:
df = pd.read_csv('teacher_workload_cleaned.csv')

In [6]:
print(df.head())
print(df.info())

  Teacher_ID      Task  Hours_Spent  Class_Size  Subject Day_of_Week
0         T1   grading            8          45     Math      Monday
1         T1  teaching            6          45     Math     Tuesday
2         T1  planning            4          45     Math   Wednesday
3         T2   grading            7          40  English      Monday
4         T2  teaching            5          40  English    Thursday
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 98 entries, 0 to 97
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Teacher_ID   98 non-null     object
 1   Task         98 non-null     object
 2   Hours_Spent  98 non-null     int64 
 3   Class_Size   98 non-null     int64 
 4   Subject      98 non-null     object
 5   Day_of_Week  98 non-null     object
dtypes: int64(2), object(4)
memory usage: 4.7+ KB
None


In [7]:
# Average hours per task
avg_hours_by_task = df.groupby('Task')['Hours_Spent'].mean().reset_index()
print("Average Hours per Task:\n", avg_hours_by_task)

# Total hours per task
total_hours_by_task = df.groupby('Task')['Hours_Spent'].sum().reset_index()
print("Total Hours per Task:\n", total_hours_by_task)

# Max hours per task
max_hours_by_task = df.groupby('Task')['Hours_Spent'].max().reset_index()
print("Max Hours per Task:\n", max_hours_by_task)

Average Hours per Task:
        Task  Hours_Spent
0   grading     7.818182
1  planning     3.656250
2  teaching     5.636364
Total Hours per Task:
        Task  Hours_Spent
0   grading          258
1  planning          117
2  teaching          186
Max Hours per Task:
        Task  Hours_Spent
0   grading            9
1  planning            5
2  teaching            7


In [8]:
hours_by_teacher = df.groupby('Teacher_ID')['Hours_Spent'].sum().reset_index()
print("Total Hours per Teacher:\n", hours_by_teacher)

Total Hours per Teacher:
    Teacher_ID  Hours_Spent
0          T1           36
1         T10           34
2         T11           35
3         T12           33
4         T13           31
5         T14           17
6         T15           18
7         T16           15
8         T17           19
9         T18           16
10        T19           17
11         T2           33
12        T20           17
13         T3           35
14         T4           36
15         T5           32
16         T6           34
17         T7           34
18         T8           36
19         T9           33


In [17]:
import plotly.express as px
fig = px.scatter(df[df['Task'] == 'grading'], x='Class_Size', y='Hours_Spent', 
                 title='Grading Hours vs. Class Size')
fig.update_layout(
    plot_bgcolor='rgba(179, 208, 219, 0.3)',  # Lightish teal-lavender blend
    paper_bgcolor='rgba(241, 216, 179, 0.3)'  # Lightish golden-lavender blend
)
fig.show()

In [10]:
hours_by_day = df.groupby('Day_of_Week')['Hours_Spent'].sum().reset_index()
print("Hours by Day:\n", hours_by_day)

Hours by Day:
   Day_of_Week  Hours_Spent
0      Friday          122
1      Monday          130
2    Thursday           89
3     Tuesday          116
4   Wednesday          104


In [11]:
avg_hours_by_task.to_csv('avg_hours_by_task.csv', index=False)
total_hours_by_task.to_csv('total_hours_by_task.csv', index=False)

In [12]:
import pandas as pd
import plotly.express as px
df = pd.read_csv('teacher_workload_cleaned.csv')

In [18]:
avg_hours_by_task = df.groupby('Task')['Hours_Spent'].mean().reset_index()
fig1 = px.bar(avg_hours_by_task, x='Task', y='Hours_Spent', 
              title='Average Hours per Task', 
              labels={'Hours_Spent': 'Average Hours', 'Task': 'Task Type'},
              color='Task')
fig1.update_layout(
    plot_bgcolor='rgba(179, 208, 219, 0.3)',  # Lightish teal-lavender blend
    paper_bgcolor='rgba(241, 216, 179, 0.3)',  # Lightish golden-lavender blend
    showlegend=False
)
fig1.show()

In [14]:
fig1.write_html('avg_hours_by_task.html')

In [19]:
total_hours_by_task = df.groupby('Task')['Hours_Spent'].sum().reset_index()
fig2 = px.pie(total_hours_by_task, names='Task', values='Hours_Spent', 
              title='Workload Distribution by Task')
fig2.update_layout(
    plot_bgcolor='rgba(179, 208, 219, 0.3)',  # Lightish teal-lavender blend
    paper_bgcolor='rgba(241, 216, 179, 0.3)'  # Lightish golden-lavender blend
)
fig2.show()
fig2.write_html('total_hours_by_task.html')

In [20]:
fig3 = px.scatter(df[df['Task'] == 'grading'], x='Class_Size', y='Hours_Spent', 
                  title='Grading Hours vs. Class Size',
                  labels={'Class_Size': 'Class Size', 'Hours_Spent': 'Grading Hours'})
fig3.update_layout(
    plot_bgcolor='rgba(179, 208, 219, 0.3)',  # Lightish teal-lavender blend
    paper_bgcolor='rgba(241, 216, 179, 0.3)'  # Lightish golden-lavender blend
)
fig3.show()
fig3.write_html('grading_vs_class_size.html')