In [9]:
    import matplotlib.pyplot as plt
    import matplotlib.dates as dates
    import scipy.stats
    import numpy as np
    import pandas as pd
    import os
    from datetime import date
    

    # Function to read files from folder
    def read_files(folder_path):
        files = os.listdir(folder_path)
        df_list = []

        for f in files:
            #print(folder_path+f)
            temp_df = pd.read_csv(folder_path+f, dtype='object')
            df_list.append(temp_df)

        return df_list


    course_data, tech_data = read_files("data/")

## Course Data

In [55]:
def get_tech_data():
    
    tech_data = pd.read_csv("data/Tech_Level.csv", dtype='object')

    # Convert data types
    tech_data = tech_data.astype({
        'Common Dealer Enterprise Name': "string",
        'Common Dealer Contract Type': "string",
        'Common Dealer State': "string",
        'Dealer AR': "string",
        'Dealer Name': "string",
        'Employee SN': "string",
        'Dealer Employee Count': int,
        'Emp Start Date': 'datetime64[ns]',
        'Emp Terminated Date': 'datetime64[ns]',
        'Account Control': "string"

    })

    # rename columns
    tech_data.rename(columns = {
        'Common Dealer Enterprise Name': "Enterprise",
        'Common Dealer Contract Type': "Contract_Type",
        'Common Dealer State': "State",
        'Dealer AR': "AR#",
        'Dealer Name': "Dealer",
        'Employee SN': "Emp_SN",
        'Dealer Employee Count': 'Employees',
        'Emp Start Date': "Start_dt",
        'Emp Terminated Date': "Term_dt",
        'Account Control': "Account_Control"
    }, inplace=True)

    # add leading zeros to Emp_SN
    tech_data['Emp_SN'] = tech_data['Emp_SN'].str.pad(width=8, side='left', fillchar='0')

    # calculate years of service
    tech_data['YOE'] = round((pd.to_datetime(date.today())-tech_data['Start_dt']).where(np.isnat(tech_data['Term_dt']),tech_data['Term_dt']-tech_data['Start_dt'] ).dt.days / 365.25, 2)
    
    return tech_data

## Course Data Cleaning

In [56]:
def get_course_data():   
    
    course_data = pd.read_csv("data/Course_Level.csv", dtype='object')
    
    # Renaming columns to be more convenient
    course_data = course_data.rename(columns = {
        'Dealer AR': "AR#",
        'Dealer Name': "Dealer",
        'Employee SN': "Emp_SN",
        'Course ID': "Course_ID",
        'Course Nickname': "Course_Nickname",
        'Points (Bobcat FL)': "Training_Pts",
        'Course Type': "Course_Type",
        '# of Completed Courses': "Completed_Courses",
        '# of ILT (0-5yr)': "Courses_0-5",
        '# of ILT (6-10yr)': "Courses_6-10",
        '# of Completed Courses (11+ yr)': "Courses_11+",
        'Completion Date': 'Completion_dt'
    })

    # replace null strings and numbers
    course_data.iloc[:, [3, 4, 6]] = course_data.iloc[:, [3, 4, 6]].fillna("-")
    course_data.iloc[:, [7, 8, 9, 10]] = course_data.iloc[:, [7, 8, 9, 10]].fillna(0)

    # Convert data types
    course_data = course_data.astype({
        'AR#': "string",
        'Dealer': "string",
        'Emp_SN': "string",
        'Course_ID': "string",
        'Course_Nickname': "string",
        'Training_Pts': float,
        'Course_Type': "string",
        'Completed_Courses': int,
        'Courses_0-5': int,
        'Courses_6-10': int,
        'Courses_11+': int,
        'Completion_dt': 'datetime64[ns]'
    })
    
    course_data['Course_Type'] = np.where(course_data['Training_Pts'] == 0, 'Other', course_data['Course_Type'])
    course_data['Course_Type'] = np.where(course_data['Training_Pts'] == 1, 'E-Learning', course_data['Course_Type'])
    course_data['Course_Type'] = np.where(course_data['Training_Pts'] == 2, 'E-Learning', course_data['Course_Type'])
    course_data['Course_Type'] = np.where(course_data['Training_Pts'] >= 8, 'ILT', course_data['Course_Type'])
    
    return course_data

NameError: name 'course_data' is not defined