In [None]:
import numpy as np

In [None]:
data_array = np.loadtxt('/content/advertising.csv', delimiter = ",", dtype='object')

In [None]:
def print_header(data_array):
  print(data_array[0])

print_header(data_array)

['TV' 'Radio' 'Newspaper' 'Sales']


In [None]:
def head(data_array):
  print(data_array[0:6, ])

head(data_array)

[['TV' 'Radio' 'Newspaper' 'Sales']
 ['230.1' '37.8' '69.2' '22.1']
 ['44.5' '39.3' '45.1' '10.4']
 ['17.2' '45.9' '69.3' '12']
 ['151.5' '41.3' '58.5' '16.5']
 ['180.8' '10.8' '58.4' '17.9']]


In [None]:
def check_null(data):
  try:
    return np.all(np.isnan(data[1:, :].astype('float')))
  except Exception as e:
    print(e)

check_null(data_array)

False

In [None]:
def index_header(data_frame):
  for index, value in enumerate(data_frame[0]):
      print(index, value)

index_header(data_array)

0 TV
1 Radio
2 Newspaper
3 Sales


In [None]:
def outlier_pct(data_frame, col_index):
  '''
  calculate the outlier percentage 
  '''
  try:
    col = data_frame[1:, col_index].astype('float')
    max_thresold = np.percentile(col, 99)
    min_threshold = np.percentile(col, 1) 
    no_outliers = [y for y in col.tolist() if y >= min_threshold and y <= max_thresold]

    out_perct = ((len(col)-len(no_outliers))/len(col))*100
    return "Outlier Percent {} : ".format(out_perct)
  
  except Exception as e:
    print("cannot convert the column values to float")

In [None]:
outlier_pct(data_array, 3)

'Outlier Percent 2.0 : '

In [None]:
def sep_header(data_array):
  '''
  seperate header from the rest of the data
  '''
  header = data_array[0]
  data_frame = data_array[1:, :].astype('float')

  return header, data_frame

In [None]:
def drop_cols(data, indexes = []):
  return np.delete(data, indexes, axis=1)

data_array = drop_cols(data_array,[3])

In [None]:
header, corr_values = sep_header(data_array)

In [None]:
stat = dict()

for col_name in header:
    stat[col_name] = {'Mean': 0, 'Std': 0, 'Q3': 0, 'Q1': 0, 'Minimum': 0, 'Maximum':0}

stat.keys()

dict_keys(['TV', 'Radio', 'Newspaper'])

In [None]:
index = 0
for col_name in stat.keys():

  data = corr_values[:, index]
  
  stat[col_name]['Minimum'] = '{0:.2f}'.format(np.min(data))
  stat[col_name]['Maximum'] = '{0:.2f}'.format(np.max(data))
  stat[col_name]['Q1'] = '{0:.2f}'.format(np.percentile(data,25))
  stat[col_name]['Q3'] = '{0:.2f}'.format(np.percentile(data,75))
  stat[col_name]['Mean'] = '{0:.2f}'.format(np.mean(data))
  stat[col_name]['Std'] = '{0:.2f}'.format(np.std(data))

  index += 1

In [None]:
stat_array = np.empty((7,4), dtype='object')
stat_array

array([[None, None, None, None],
       [None, None, None, None],
       [None, None, None, None],
       [None, None, None, None],
       [None, None, None, None],
       [None, None, None, None],
       [None, None, None, None]], dtype=object)

In [None]:
stat_array[0, 1:] = header
stat_array[1:, 0] = ['Mean', 'Std', 'Q3', 'Q1', 'Minimum', 'Maximum']

In [None]:
for i in range(0, 6):
    for j in range(0, 3):
        stat_array[i+1][j+1] = stat[stat_array[0][j+1]][stat_array[i+1][0]]

In [None]:
stat_array

array([[None, 'TV', 'Radio', 'Newspaper'],
       ['Mean', '147.04', '23.26', '30.55'],
       ['Std', '85.64', '14.81', '21.72'],
       ['Q3', '218.82', '36.52', '45.10'],
       ['Q1', '74.38', '9.97', '12.75'],
       ['Minimum', '0.70', '0.00', '0.30'],
       ['Maximum', '296.40', '49.60', '114.00']], dtype=object)

In [None]:
from tabulate import tabulate

table = tabulate(stat_array.T, tablefmt="fancy_grid")

print(table)

╒═══════════╤════════╤═══════╤════════╤═══════╤═════════╤═════════╕
│           │ Mean   │ Std   │ Q3     │ Q1    │ Minimum │ Maximum │
├───────────┼────────┼───────┼────────┼───────┼─────────┼─────────┤
│ TV        │ 147.04 │ 85.64 │ 218.82 │ 74.38 │ 0.70    │ 296.40  │
├───────────┼────────┼───────┼────────┼───────┼─────────┼─────────┤
│ Radio     │ 23.26  │ 14.81 │ 36.52  │ 9.97  │ 0.00    │ 49.60   │
├───────────┼────────┼───────┼────────┼───────┼─────────┼─────────┤
│ Newspaper │ 30.55  │ 21.72 │ 45.10  │ 12.75 │ 0.30    │ 114.00  │
╘═══════════╧════════╧═══════╧════════╧═══════╧═════════╧═════════╛
