# Descriptive Statistics with Python

**df.describe()** Quick statistics by column

In [39]:
import numpy as np
import pandas as pd

dataframe = pd.read_excel("all_data_M_2021.xlsx")

In [None]:
dataframe.head()

In [None]:
number_of_rows = len(dataframe)
print(f'Dataframe has {number_of_rows} rows.')

In [None]:
dataframe.describe()

In [40]:
columns_to_modify = ['H_MEAN', 'A_MEAN', 'MEAN_PRSE', 'H_PCT10', 
                     'H_PCT25', 'H_MEDIAN', 'H_PCT75', 'H_PCT90', 
                     'A_PCT10', 'A_PCT25', 'A_MEDIAN', 'A_PCT75', 
                     'A_PCT90'
                    ]

# Splits columns by number type wanted
int_columns = ['A_MEAN', 'A_PCT10', 'A_PCT25', 'A_MEDIAN', 'A_PCT75', 'A_PCT90']
float_columns = ['H_MEAN', 'MEAN_PRSE', 'H_PCT10', 'H_PCT25', 'H_MEDIAN', 'H_PCT75', 'H_PCT90']

# Initialize dict of column and its type
dict_column_and_type = {}

# Attach column name with the data type
for column_name in columns_to_modify:
    if column_name in int_columns:
        # 'Int64' is needed when mixing ints with pandas.NA
        dict_column_and_type[column_name] = 'Int64'
    else:
        # 'Float64' is needed when mixing floats with pandas.NA
        dict_column_and_type[column_name] = 'Float64'
        
# To use .apply, we want the input of the definition to be the value in the table
def remove_commas(text):
    return text.replace(",", "")

def replace_symbols(text):
    # pandas.NA is Not a Number
    new_text = text.replace("*", pd.NA)
    new_text = new_text.replace("#", pd.NA)
    new_text = new_text.replace(np.nan, pd.NA)
    return new_text

In [48]:
# Initialize our sanitized dataframe
sanitized_df = dataframe

# Apply our two functions
sanitized_df[columns_to_modify] = sanitized_df[columns_to_modify].apply(remove_commas)
sanitized_df[columns_to_modify] = sanitized_df[columns_to_modify].apply(replace_symbols)

# Using the dictonary, we cast each column as its associated data type
sanitized_df = sanitized_df.astype(dict_column_and_type)

In [None]:
sanitized_df.describe()