In [1]:
import pandas as pd

In [2]:
# === Load data ===
class_list = pd.read_csv("class_list.csv")
lessons_required = pd.read_csv("lessons_required.csv")
student_classes = pd.read_csv("student_classes.csv")
student_list = pd.read_csv("student_list.csv")
teacher_classes = pd.read_csv("teacher_classes.csv")
teacher_list = pd.read_csv("teacher_list.csv")
timeslots_days = pd.read_csv("timeslots.csv")
weekday = pd.read_csv("weekdays.csv")

In [3]:
# === 1. Basic quality checks ===
datasets = {
    "class_list": class_list,
    "lessons_required": lessons_required,
    "student_classes": student_classes,
    "student_list": student_list,
    "teacher_classes": teacher_classes,
    "teacher_list": teacher_list,
    "timeslots_days": timeslots_days,
    "weekday": weekday
}

for name, df in datasets.items():
    print(f"\n=== {name.upper()} ===")
    print("Shape:", df.shape)
    print("Nulls:\n", df.isna().sum())
    print("Duplicates:", df.duplicated().sum())
    print("Unique values per column:\n", df.nunique())


=== CLASS_LIST ===
Shape: (60, 1)
Nulls:
 class    0
dtype: int64
Duplicates: 0
Unique values per column:
 class    60
dtype: int64

=== LESSONS_REQUIRED ===
Shape: (60, 2)
Nulls:
 class          0
num_lessons    0
dtype: int64
Duplicates: 0
Unique values per column:
 class          60
num_lessons     5
dtype: int64

=== STUDENT_CLASSES ===
Shape: (1706, 2)
Nulls:
 student    0
class      0
dtype: int64
Duplicates: 0
Unique values per column:
 student    461
class       61
dtype: int64

=== STUDENT_LIST ===
Shape: (461, 1)
Nulls:
 student    0
dtype: int64
Duplicates: 0
Unique values per column:
 student    461
dtype: int64

=== TEACHER_CLASSES ===
Shape: (61, 2)
Nulls:
 teacher    0
class      0
dtype: int64
Duplicates: 0
Unique values per column:
 teacher    36
class      61
dtype: int64

=== TEACHER_LIST ===
Shape: (36, 1)
Nulls:
 teacher    0
dtype: int64
Duplicates: 0
Unique values per column:
 teacher    36
dtype: int64

=== TIMESLOTS_DAYS ===
Shape: (45, 2)
Nulls:
 timeslot    

In [6]:
import pandas as pd

# Load data
class_list = pd.read_csv("class_list.csv")
lessons_required = pd.read_csv("lessons_required.csv")
student_classes = pd.read_csv("student_classes.csv")
teacher_classes = pd.read_csv("teacher_classes.csv")

# Get unique classes
set_class_list = set(class_list['class'].unique())
set_student_classes = set(student_classes['class'].unique())
set_teacher_classes = set(teacher_classes['class'].unique())
set_lessons_required = set(lessons_required['class'].unique())

# Combine all classes seen anywhere
all_classes = set_class_list | set_student_classes | set_teacher_classes | set_lessons_required

# Check membership across datasets
report = []
for c in sorted(all_classes):
    report.append({
        "class": c,
        "in_class_list": c in set_class_list,
        "in_student_classes": c in set_student_classes,
        "in_teacher_classes": c in set_teacher_classes,
        "in_lessons_required": c in set_lessons_required,
    })

report_df = pd.DataFrame(report)

print(report_df)

# Optional: show only problematic classes
missing_report = report_df[(~report_df['in_class_list']) |
                           (~report_df['in_student_classes']) |
                           (~report_df['in_teacher_classes']) |
                           (~report_df['in_lessons_required'])]

print("\n=== Missing / Inconsistent Classes ===")
print(missing_report)

           class  in_class_list  in_student_classes  in_teacher_classes  \
0        accn-10           True                True                True   
1        accn-11           True                True                True   
2        accn-12           True                True                True   
3   afrikaans-10           True                True                True   
4   afrikaans-11           True                True                True   
..           ...            ...                 ...                 ...   
56    tourism-11           True                True                True   
57    tourism-12           True                True                True   
58       vsla-10           True                True                True   
59       vsla-11           True                True                True   
60       vsla-12           True                True                True   

    in_lessons_required  
0                  True  
1                  True  
2                  Tr

In [8]:
report_df[report_df.isin(True).any().any()]

TypeError: only list-like or dict-like objects are allowed to be passed to DataFrame.isin(), you passed a 'bool'

In [9]:
missing_report = report_df[~report_df[['in_class_list', 
                                       'in_student_classes', 
                                       'in_teacher_classes', 
                                       'in_lessons_required']].all(axis=1)]

In [10]:
missing_report

Unnamed: 0,class,in_class_list,in_student_classes,in_teacher_classes,in_lessons_required
39,isizulu-12,False,True,True,False
