# Import and Load Data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
url = 'https://raw.githubusercontent.com/jonathan-data-analysis/colab_test_data/main/athletes.csv'
df = pd.read_csv(url) # convert to dataframe

## Let's Add Some Stats 💯
We can add summary statistics like count, mean, standard deviation, and more. The good news is that we won't have to create the logic from scratch. Instead, we'll use `describe()` to gather the summary statistics we need.

In [None]:
class Summary:
    def __init__(self, data):
        self.df = pd.read_csv(data)
    def print_head(self):
        print(self.df.head())
    def get_columns(self):
        print(list(self.df.columns))
    def get_dim(self):
        print('Rows:', len(self.df))
        print('Columns:', len(list(self.df.columns)))
    def get_stats(self):
        print(self.df.describe())

In [None]:
data = Summary(url)
data.get_stats()

              rcc         wcc          hc          hg        ferr         bmi  \
count  202.000000  202.000000  202.000000  202.000000  202.000000  202.000000   
mean     4.718614    7.108911   43.091584   14.566337   76.876238   22.955891   
std      0.457976    1.800337    3.662989    1.362451   47.501239    2.863933   
min      3.800000    3.300000   35.900000   11.600000    8.000000   16.750000   
25%      4.372500    5.900000   40.600000   13.500000   41.250000   21.082500   
50%      4.755000    6.850000   43.500000   14.700000   65.500000   22.720000   
75%      5.030000    8.275000   45.575000   15.575000   97.000000   24.465000   
max      6.720000   14.300000   59.700000   19.200000  234.000000   34.420000   

              ssf      pcBfat         lbm          ht          wt  
count  202.000000  202.000000  202.000000  202.000000  202.000000  
mean    69.021782   13.507426   64.873713  180.103960   75.007921  
std     32.565333    6.189826   13.070197    9.734494   13.925199 

## More Summary Statistics
We can also add just the summary statistics we want, uasing `mean()`.

In [None]:
class Summary:
    def __init__(self, data):
        self.df = pd.read_csv(data)
    def print_head(self):
        print(self.df.head())
    def get_columns(self):
        print(list(self.df.columns))
    def get_dim(self):
        print('Rows:', len(self.df))
        print('Columns:', len(list(self.df.columns)))
    def get_stats(self):
        print(self.df.describe())
    def get_mean(self, column):
        print(f"Mean {column}:", self.df[column].mean())


In [None]:
data = Summary(url)
data.get_mean('wt')

Mean wt: 75.0079207920792


In [None]:
class Summary:
    def __init__(self, data):
        self.df = pd.read_csv(data)
    def print_head(self):
        print(self.df.head())
    def get_columns(self):
        print(list(self.df.columns))
    def get_dim(self):
        print('Rows:', len(self.df))
        print('Columns:', len(list(self.df.columns)))
    def get_stats(self):
        print(self.df.describe())
    def get_mean(self, column):
        print(f"Mean {column}:", self.df[column].mean())
    def get_standard_dev(self, column):
        print(f"STD {column}:", self.df[column].std())

In [None]:
data = Summary(url)
data.get_standard_dev('wt')

STD wt: 13.925199486183791
