In [325]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import os

In [326]:
dataframes = {
    '2018': pd.read_csv('Data KP - 2018.csv'),
    '2019': pd.read_csv('Data KP - 2019.csv'),
    '2020': pd.read_csv('Data KP - 2020.csv'),
    '2021': pd.read_csv('Data KP - 2021.csv'),
    '2022': pd.read_csv('Data KP - 2022.csv'),
    '2023': pd.read_csv('Data KP - 2023.csv')
}

In [327]:
thresholds = {
    '2018': {
        'OWeek': 6, 
        'Upacara': 4, 
        'L101': 4, 
        'OK': 30, 
        'Mentoring': 12, 
        'Workshop/Seminar': 16, 
        'Lainnya': 28, 
        'Total': 100
    },
    '2019': {
        'OWeek': 6, 
        'Upacara': 4, 
        'Pra-LDK': 4, 
        'OK': 25, 
        'Mentoring': 12, 
        'Talkshow Wajib': 3, 
        'Kompetisi': 9, 
        'Kepanitiaan': 8, 
        'Pengmas': 9, 
        'Lainnya': 20, 
        'Total': 100
    },
    '2020': {
        'OWeek': 6, 
        'Upacara': 4, 
        'Pra-LDK': 4, 
        'OK': 25, 
        'Mentoring': 12, 
        'Talkshow Wajib': 3, 
        'Kompetisi': 9, 
        'Kepanitiaan': 8, 
        'Pengmas': 9, 
        'Lainnya': 20, 
        'Total': 100
    },
    '2021': {
        'OWeek': 6, 
        'Upacara': 2, 
        'Pra-LDK': 4, 
        'OK': 20, 
        'Mentoring': 12, 
        'Talkshow Wajib': 3, 
        'Kompetisi': 9, 
        'Kepanitiaan': 8, 
        'Pengmas': 9, 
        'Career Center': 9, 
        'International Office': 6, 
        'Lainnya': 12, 
        'Total': 100
    },
    '2022': {
        'OWeek': 6, 
        'Upacara': 2, 
        'Pra-LDK': 4, 
        'OK': 20, 
        'Mentoring': 12, 
        'Talkshow Wajib': 3, 
        'Kompetisi': 9, 
        'Kepanitiaan': 8, 
        'Pengmas': 9, 
        'Career Center': 9, 
        'International Office': 6, 
        'Lainnya': 12, 
        'Total': 100
    },
    '2023': {
        'OWeek': 6, 
        'Upacara': 2, 
        'Pra-LDK': 4, 
        'OK': 20, 
        'Mentoring': 12, 
        'Talkshow Wajib': 4, 
        'Kompetisi': 9, 
        'Kepanitiaan': 6, 
        'Pengmas': 4, 
        'Career Center': 9, 
        'International Office': 6, 
        'Lainnya': 18, 
        'Total': 100
    }
}

In [328]:
column_mappings = {
    '2018': {
        'Oweek': 'OWeek',
        'Upacara': 'Upacara',
        'Leadership 101': 'L101',
        'Organisasi Kemahasiswaan': 'OK',
        'Mentoring': 'Mentoring',
        'Workshop/Seminar': 'Workshop/Seminar',
        'Lainnya': 'Lainnya',
        'Total Required': 'Total'
    },
    '2019': {
        'Oweek': 'OWeek',
        'Upacara': 'Upacara',
        'Pra Latihan Dasar Kepemimpinan': 'Pra-LDK',
        'Organisasi Kemahasiswaan': 'OK',
        'Mentoring': 'Mentoring',
        'Talkshow': 'Talkshow Wajib',
        'Kompetisi': 'Kompetisi',
        'Kepanitiaan': 'Kepanitiaan',
        'Pengabdian Masyarakat': 'Pengmas',
        'Kegiatan Career Center': 'Career Center',
        'Kegiatan International Office': 'International Office',
        'Lainnya': 'Lainnya',
        'Total Required': 'Total'
    },
    '2020': {
        'Oweek': 'OWeek',
        'Upacara': 'Upacara',
        'Pra Latihan Dasar Kepemimpinan': 'Pra-LDK',
        'Organisasi Kemahasiswaan': 'OK',
        'Mentoring': 'Mentoring',
        'Talkshow': 'Talkshow Wajib',
        'Kompetisi': 'Kompetisi',
        'Kepanitiaan': 'Kepanitiaan',
        'Pengabdian Masyarakat': 'Pengmas',
        'Kegiatan Career Center': 'Career Center',
        'Kegiatan International Office': 'International Office',
        'Lainnya': 'Lainnya',
        'Total Required': 'Total'
    },
    '2021': {
        'Oweek': 'OWeek',
        'Upacara': 'Upacara',
        'Pra Latihan Dasar Kepemimpinan': 'Pra-LDK',
        'Organisasi Kemahasiswaan': 'OK',
        'Mentoring': 'Mentoring',
        'Talkshow': 'Talkshow Wajib',
        'Kompetisi': 'Kompetisi',
        'Kepanitiaan': 'Kepanitiaan',
        'Pengabdian Masyarakat': 'Pengmas',
        'Kegiatan Career Center': 'Career Center',
        'Kegiatan International Office': 'International Office',
        'Lainnya': 'Lainnya',
        'Total Required': 'Total'
    },
    '2022': {
        'Oweek': 'OWeek',
        'Upacara': 'Upacara',
        'Pra Latihan Dasar Kepemimpinan': 'Pra-LDK',
        'Organisasi Kemahasiswaan': 'OK',
        'Mentoring': 'Mentoring',
        'Talkshow': 'Talkshow Wajib',
        'Kompetisi': 'Kompetisi',
        'Kepanitiaan': 'Kepanitiaan',
        'Pengabdian Masyarakat': 'Pengmas',
        'Kegiatan Career Center': 'Career Center',
        'Kegiatan International Office': 'International Office',
        'Lainnya': 'Lainnya',
        'Total Required': 'Total'
    },
    '2023': {
        'Oweek': 'OWeek',
        'Upacara': 'Upacara',
        'Pra Latihan Dasar Kepemimpinan': 'Pra-LDK',
        'Organisasi Kemahasiswaan': 'OK',
        'Mentoring': 'Mentoring',
        'Talkshow': 'Talkshow Wajib',
        'Kompetisi': 'Kompetisi',
        'Kepanitiaan': 'Kepanitiaan',
        'Pengabdian Masyarakat': 'Pengmas',
        'Kegiatan Career Center': 'Career Center',
        'Kegiatan International Office': 'International Office',
        'LainnyaMin': 'Lainnya',
        'Total Required': 'Total'
    }
}

In [329]:
import pandas as pd
import os
import re

# Define the base directory for saving the files
output_dir = 'output'

# Create the directory if it does not exist
os.makedirs(output_dir, exist_ok=True)

# Concatenate all DataFrames into one
combined_df = pd.DataFrame()

# Iterate through all years to process and concatenate
for year, df in dataframes.items():
    # Print columns for debugging
    print(f"Columns for {year}: {df.columns.tolist()}")
    
    # Rename columns
    df.rename(columns=column_mappings[year], inplace=True)
    
    # Add year column for tracking
    df['Year'] = year
    
    df.fillna("-", inplace=True)
    
    # Convert relevant columns to numeric
    threshold_cols = list(thresholds[year].keys())
    for col in threshold_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    
    # Drop rows with NaN in threshold columns
    df.dropna(subset=threshold_cols, inplace=True)
    
    # Concatenate the current DataFrame to the combined DataFrame
    combined_df = pd.concat([combined_df, df])

# Ensure the combined DataFrame has no duplicates
combined_df = combined_df.drop_duplicates()

# Sort by 'Major', 'NIS', and 'Name'
combined_df.sort_values(by=['Major', 'NIS', 'Name'], inplace=True)

# Now create a list for students who didn't meet each threshold criterion
threshold_issues = {}

# Iterate through all threshold criteria
for year, threshold in thresholds.items():
    for col, value in threshold.items():
        if col in combined_df.columns:
            # Filter students who didn't meet the threshold for this criterion
            failed_students = combined_df[combined_df[col] < value]
            if not failed_students.empty:
                if col not in threshold_issues:
                    threshold_issues[col] = pd.DataFrame()  # Create an empty DataFrame if not yet created
                # Append the failed students to the respective criterion list
                threshold_issues[col] = pd.concat([threshold_issues[col], failed_students])

# Function to sanitize column names for valid filenames
def sanitize_filename(name):
    return re.sub(r'[^\w\-_\. ]', '_', name)

# Output each list of students who didn't meet a criterion
for col, df in threshold_issues.items():
    # Remove duplicates (in case students appear multiple times for the same issue)
    df = df.drop_duplicates()
    
    # Sort by 'Major', 'NIS', and 'Name'
    df.sort_values(by=['Major', 'NIS', 'Name'], inplace=True)
    
    # Sanitize the column name for use in the file name
    sanitized_col = sanitize_filename(col)
    
    # Save the filtered DataFrame for this criterion
    output_filename = os.path.join(output_dir, f"students_below_threshold_{sanitized_col}.xlsx")
    df.to_excel(output_filename, index=False)

print("Files saved in the 'output' folder for each threshold criterion.")


Columns for 2018: ['Major', 'NIS', 'Name', 'Phone', 'Mobile', 'Email Student UC', 'Email Student Pribadi', 'Email Dosen UC', 'Email Dosen Pribadi', 'Status', 'Student Year', 'Oweek', 'Upacara', 'Leadership 101', 'Organisasi Kemahasiswaan', 'Mentoring', 'Workshop/Seminar', 'Lainnya', 'Total Required']
Columns for 2019: ['Major', 'NIS', 'Name', 'Phone', 'Mobile', 'Email Student UC', 'Email Student Pribadi', 'Email Dosen UC', 'Email Dosen Pribadi', 'Status', 'Student Year', 'Oweek', 'Upacara', 'Pra Latihan Dasar Kepemimpinan', 'Organisasi Kemahasiswaan', 'Mentoring', 'Talkshow', 'Kompetisi', 'Kepanitiaan', 'Pengabdian Masyarakat', 'Lainnya', 'Total Required']
Columns for 2020: ['Major', 'NIS', 'Name', 'Phone', 'Mobile', 'Email Student UC', 'Email Student Pribadi', 'Email Dosen UC', 'Email Dosen Pribadi', 'Status', 'Student Year', 'Oweek', 'Upacara', 'Pra Latihan Dasar Kepemimpinan', 'Organisasi Kemahasiswaan', 'Mentoring', 'Talkshow', 'Kompetisi', 'Kepanitiaan', 'Pengabdian Masyarakat', '

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_values(by=['Major', 'NIS', 'Name'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_values(by=['Major', 'NIS', 'Name'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_values(by=['Major', 'NIS', 'Name'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-c

Files saved in the 'output' folder for each threshold criterion.
