In [56]:
pip install tabulate

Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
from tabulate import tabulate

def load_data(file_path):
    """
    Load the student data from a CSV file into a DataFrame.
    
    Parameters:
    file_path (str): The path to the CSV file.

    Returns:
    pd.DataFrame: The DataFrame containing the student data or None if there's an error.
    """
    try:
        df = pd.read_csv(file_path)
        print("Data loaded successfully.")
        return df
    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found. Please check the file path and try again.")
        return None
    except pd.errors.EmptyDataError:
        print("Error: The CSV file is empty.")
        return None
    except pd.errors.ParserError:
        print("Error: The CSV file is not correctly formatted.")
        return None


def top_mother_education_by_race(df, race, parental_involvement_level, top_n=3):
    """
    Identify the top N levels of mother's education for a specific race of students
    based on parental involvement levels.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the student data.
    race (str): The race to filter by.
    parental_involvement_level (str): The parental involvement level to filter by.
    top_n (int): The number of top education levels to return.

    Returns:
    pd.Series: A series containing the top N mother's education levels.
    """
    # Filter the data by the specified race and parental involvement level
    filtered_df = df[
        (df['Race'].str.lower() == race.lower()) & 
        (df['Parental_involvement'].str.lower() == parental_involvement_level.lower())
    ]

    # Group by mother's education level and count occurrences
    education_counts = filtered_df['Mother_education_level'].value_counts().head(top_n)
    
    return education_counts


def calculate_average_absences(df, parental_involvement_level):
    """
    Calculate the average number of absences for students with a specific level of parental involvement.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the student data.
    parental_involvement_level (str): The parental involvement level to filter by.

    Returns:
    float: The average number of absences, or None if no data is available.
    """
    # Filter the data by the specified parental involvement level
    filtered_df = df[df['Parental_involvement'].str.lower() == parental_involvement_level.lower()]

    if not filtered_df.empty:
        # Calculate the average number of absences
        average_absences = filtered_df['Absences'].mean()
        return average_absences
    else:
        return None


def analyze_math_scores_by_race(df):
    """
    Analyze the average math scores for students with attendance rate > 80% based on race.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the student data.
    """
    # Prompt the user to input the race
    input_race = input("Enter the race to analyze (as it appears in the data): ").strip()

    # Filter the data for students with an attendance rate greater than 80% and the specified race
    filtered_data = df[
        (df['Attendance_rate'] > 80) & 
        (df['Race'].str.lower() == input_race.lower())
    ]

    # Calculate the average math score for the specified race
    average_math_score = filtered_data['Math_score'].mean()

    # Display the result
    print("===================================================================================================================")
    if pd.notna(average_math_score):
        print(f"Average Math Score for students with attendance rate > 80% in race '{input_race}': {average_math_score:.2f}")
    else:
        print(f"No data available for race '{input_race}' with attendance rate > 80%")
    print("===================================================================================================================")


def analyze_studytime_by_sex(df):
    """
    Analyze the relationship between study time and math scores by sex.

    Parameters:
    df (pd.DataFrame): The DataFrame containing the student data.
    """
    # Prompt the user to input the sex
    input_sex = input("Enter the sex to analyze (as it appears in the data): ").strip()

    # Define the study time threshold
    study_time_threshold = 2  # Adjust the threshold as needed

    # Filter the data for students who study more than the defined threshold hours and the specified sex
    filtered_data = df[
        (df['Studytime'] > study_time_threshold) & 
        (df['Sex'].str.lower() == input_sex.lower())
    ]

    # Calculate the average math score for the specified sex
    average_math_score = filtered_data['Math_score'].mean()

    # Calculate the correlation between study time and math score for the specified sex
    correlation = filtered_data['Studytime'].corr(filtered_data['Math_score'])

    # Display the results
    print("===================================================================================================================")
    if not filtered_data.empty:
        print(f"Average Math Score for {input_sex} students who study more than {study_time_threshold} hours: {average_math_score:.2f}")
        print(f"Correlation between Studytime and Math Score for {input_sex} students: {correlation:.2f}")
    else:
        print(f"No data available for sex '{input_sex}' with study time > {study_time_threshold} hours")
    print("===================================================================================================================")


def main(df):
   
    while True:
        # Display the menu
        print("\nMenu:")
        print("1. Identify the top 3 levels of mother’s education for a specific race of students based on parental involvement levels.")
        print("2. Analyze the average number of absences among students with a particular level of parental involvement.")
        print("3. Analyze average math scores for students with attendance rate > 80% based on race.")
        print("4. Analyze study time and math scores by sex.")
        print("5. Exit")
        
        choice = input("Enter your choice (1, 2, 3, 4, or 5): ").strip()

        if choice == '1':
            # Query 1: Identify top 3 levels of mother's education
            race = input("Enter the race you're interested in: ").strip().lower()
            parental_involvement_level = input("Enter the parental involvement level (e.g., high, medium, low): ").strip()
            
            # Validate race input
            valid_races = df['Race'].str.lower().unique()
            if race not in valid_races:
                print(f"'{race}' is not a valid race. Please enter a valid race.")
                continue
            
            top_education_levels = top_mother_education_by_race(df, race, parental_involvement_level)
            
            # Display the results
            print("===================================================================================================================")
            if not top_education_levels.empty:
                print(f"Top 3 Mother's Education Levels for race '{race}' with '{parental_involvement_level}' parental involvement:")
                print(tabulate(top_education_levels.reset_index(), headers=["Mother's Education Level", "Count"], tablefmt="grid"))
            else:
                print(f"No data available for race '{race}' with '{parental_involvement_level}' parental involvement.")
            print("===================================================================================================================")

        elif choice == '2':
            # Query 2: Analyze average absences
            parental_involvement_level = input("Enter the parental involvement level (e.g., high, medium, low): ").strip()
            average_absences = calculate_average_absences(df, parental_involvement_level)
            
            # Display the result
            print("===================================================================================================================")
            if average_absences is not None:
                print(f"The average number of absences for students with '{parental_involvement_level}' parental involvement is: {average_absences:.2f}")
            else:
                print(f"No data available for parental involvement level '{parental_involvement_level}'.")
            print("===================================================================================================================")

        elif choice == '3':
            # Query 3: Analyze average math scores for students with attendance rate > 80% based on race
            analyze_math_scores_by_race(df)

        elif choice == '4':
            # Query 4: Analyze study time and math scores by sex
            analyze_studytime_by_sex(df)

        elif choice == '5':
            print("Exiting the application.")
            break

        else:
            print("Invalid choice. Please select a valid option.")

if __name__ == "__main__":
    file_path = 'students_data.csv'  # replace with your actual file path
    df = load_data(file_path)
    if df is not None:
        main(df)


Data loaded successfully.

Menu:
1. Identify the top 3 levels of mother’s education for a specific race of students based on parental involvement levels.
2. Analyze the average number of absences among students with a particular level of parental involvement.
3. Analyze average math scores for students with attendance rate > 80% based on race.
4. Analyze study time and math scores by sex.
5. Exit


Enter your choice (1, 2, 3, 4, or 5):  2
Enter the parental involvement level (e.g., high, medium, low):  low


The average number of absences for students with 'low' parental involvement is: 47.30

Menu:
1. Identify the top 3 levels of mother’s education for a specific race of students based on parental involvement levels.
2. Analyze the average number of absences among students with a particular level of parental involvement.
3. Analyze average math scores for students with attendance rate > 80% based on race.
4. Analyze study time and math scores by sex.
5. Exit


Enter your choice (1, 2, 3, 4, or 5):  3
Enter the race to analyze (as it appears in the data):  Other


Average Math Score for students with attendance rate > 80% in race 'Other': 50.77

Menu:
1. Identify the top 3 levels of mother’s education for a specific race of students based on parental involvement levels.
2. Analyze the average number of absences among students with a particular level of parental involvement.
3. Analyze average math scores for students with attendance rate > 80% based on race.
4. Analyze study time and math scores by sex.
5. Exit


Enter your choice (1, 2, 3, 4, or 5):  5


Exiting the application.
