In [18]:
import pandas as pd
import numpy as np

class dslr:
    def __init__(self, path) -> None:
        self.df = pd.read_csv(path)

    def describe(self) -> pd.DataFrame:        
        df = self.df.select_dtypes(include='number')
        df = df.drop(['Index'], axis=1)
        df = df.dropna(axis=1, how='all')
        results = {}
        
        for column_name, column_data in df.items():
            values = column_data.dropna().values
            sorted_values = np.sort(values)
            count = len(sorted_values)
            mean = np.sum(sorted_values) / count
            std = np.sqrt(np.sum((sorted_values - mean) ** 2) / count)
            min_val = sorted_values[0]
            q25 = np.percentile(sorted_values, 25)
            q50 = np.percentile(sorted_values, 50)
            q75 = np.percentile(sorted_values, 75)
            max_val = sorted_values[-1]
            
            results[column_name] = {
                "Count": count,
                "Mean": mean,
                "Std": std,
                "Min": min_val,
                "25%": q25,
                "50%": q50,
                "75%": q75,
                "Max": max_val
            }
        
        return pd.DataFrame(results)
    
test = dslr("datasets/dataset_test.csv")
test.describe()

Unnamed: 0,Arithmancy,Astronomy,Herbology,Defense Against the Dark Arts,Divination,Muggle Studies,Ancient Runes,History of Magic,Transfiguration,Potions,Care of Magical Creatures,Charms,Flying
Count,387.0,387.0,389.0,392.0,394.0,390.0,392.0,389.0,389.0,390.0,392.0,400.0,400.0
Mean,50088.971576,48.155326,1.385517,-0.537843,3.411071,-220.169594,495.937543,2.829816,1030.885777,5.77686,0.022985,-243.181109,23.36745
Std,15452.744803,511.875159,5.048793,5.112229,3.88625,497.030335,101.52767,4.311657,45.288575,3.213497,1.017705,8.716274,95.066887
Min,4536.0,-802.72518,-9.687662,-8.700635,-8.183,-1041.323658,319.36025,-7.18909,906.93205,-1.16206,-2.871119,-259.86678,-143.52
25%,40167.5,-483.71969,-4.183731,-5.209348,3.52875,-580.049325,400.719831,2.227669,1028.499974,3.230372,-0.646285,-250.150987,-38.475
50%,49238.0,292.108738,3.63049,-2.915654,4.703,-407.549022,484.102477,4.290164,1047.648405,5.73133,0.059416,-244.75871,-2.555
75%,60157.0,508.713093,5.405685,4.785055,5.571,221.387925,591.372101,5.662488,1060.367924,8.136907,0.711248,-232.79816,45.3325
Max,99744.0,870.063498,9.678462,8.027252,8.604,939.317135,667.674165,10.366995,1099.966073,13.390013,3.205525,-226.00382,282.43
