In [1]:
import requests
import pandas as pd
import numpy as np
import math as m
import statistics as s
import scipy.stats as ss
from tabulate import tabulate

url = 'https://raw.githubusercontent.com/Apress/data-analysis-and-visualization-using-python/master/Ch07/Salaries.csv'

response = requests.get(url)

if response.status_code == 200:
    with open('Salaries.csv', 'wb') as f:
        f.write(response.content)
        print('Dataset saved to file.')
else:
    print('Failed to download dataset.')


df = pd.read_csv('Salaries.csv')
df


Dataset saved to file.


Unnamed: 0,rank,discipline,phd,service,sex,salary
0,Prof,B,56,49,Male,186960
1,Prof,A,12,6,Male,93000
2,Prof,A,23,20,Male,110515
3,Prof,A,40,31,Male,131205
4,Prof,B,20,18,Male,104800
...,...,...,...,...,...,...
73,Prof,B,18,10,Female,105450
74,AssocProf,B,19,6,Female,104542
75,Prof,B,17,17,Female,124312
76,Prof,A,28,14,Female,109954


In [13]:
assocprof = df[df['rank'] == 'AssocProf']
asstprof = df[df['rank'] == 'AsstProf']
prof = df[df['rank'] == 'Prof']



In [15]:
# Counting the valid and missing data

va = assocprof.notna().all(axis=1).sum()
vb = asstprof.notna().all(axis=1).sum()
vc = prof.notna().all(axis=1).sum()

ma = assocprof.isnull().sum(axis=1).sum()
mb = asstprof.isnull().sum(axis=1).sum()
mc = prof.isnull().sum(axis=1).sum()

#Getting the contents of the table
assocprof_median = np.median(assocprof['salary'])
asstprof_median = np.median(asstprof['salary'])
prof_median = np.median(prof['salary'])

assocprof_mean = np.average(assocprof['salary'])
asstprof_mean = np.average(asstprof['salary'])
prof_mean = np.average(prof['salary'])

assocprof_stdev = s.stdev(assocprof['salary'])
asstprof_stdev = s.stdev(asstprof['salary'])
prof_stdev = s.stdev(prof['salary'])

assocprof_se_mean = assocprof_stdev / m.sqrt(va)
asstprof_se_mean = asstprof_stdev / m.sqrt(vb)
prof_se_mean = prof_stdev / m.sqrt(vc)

assocprof_cv = "%.3f" % (assocprof_stdev / assocprof_mean)
asstprof_cv = "%.3f" % (asstprof_stdev / asstprof_mean)
prof_cv = "%.3f" % (prof_stdev / prof_mean)

assocprof_skew = ss.skew(assocprof['salary'])
asstprof_skew = ss.skew(asstprof['salary'])
prof_skew = ss.skew(prof['salary'])

assocprof_se_skew = np.sqrt(6 * va * (va - 1) / ((va - 2) * (va + 1) * (va + 3)))
asstprof_se_skew = np.sqrt(6 * vb * (vb - 1) / ((vb - 2) * (vb + 1) * (vb + 3)))
prof_se_skew = np.sqrt(6 * vc * (vc - 1) / ((vc - 2) * (vc + 1) * (vc + 3)))

assocprof_k = ss.kurtosis(assocprof['salary'])
asstprof_k = ss.kurtosis(asstprof['salary'])
prof_k = ss.kurtosis(prof['salary'])

assocprof_se_k = np.sqrt(24 * va * (va - 2) * (va - 3) / ((va + 1) * (va + 3) * (va + 5) * (va + 7)))
asstprof_se_k = np.sqrt(24 * vb * (vb - 2) * (vb - 3) / ((vb + 1) * (vb + 3) * (vb + 5) * (vb + 7)))
prof_se_k = np.sqrt(24 * vc * (vc - 2) * (vc - 3) / ((vc + 1) * (vc + 3) * (vc + 5) * (vc + 7)))

assocprof_min = min(assocprof['salary'])
asstprof_min = min(asstprof['salary'])
prof_min = min(prof['salary'])

assocprof_max = max(assocprof['salary'])
asstprof_max = max(asstprof['salary'])
prof_max = max(prof['salary'])

#Calculating for the percentiles
assocprof_desc = assocprof.describe(percentiles=[0.25, 0.5, 0.75, 0.1, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9]).loc[['25%', '50%', '75%', '10%', '20%', '30%', '40%', '60%', '70%', '80%', '90%'], 'salary'].values
asstprof_desc = asstprof.describe(percentiles=[0.25, 0.5, 0.75, 0.1, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9]).loc[['25%', '50%', '75%', '10%', '20%', '30%', '40%', '60%', '70%', '80%', '90%'], 'salary'].values
prof_desc = prof.describe(percentiles=[0.25, 0.5, 0.75, 0.1, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9]).loc[['25%', '50%', '75%', '10%', '20%', '30%', '40%', '60%', '70%', '80%', '90%'], 'salary'].values

# Organize the data into a list of lists
data = [
    [' ', 'AssocProf', 'AsstProf', 'Prof'],
    ['Valid', va, vb, vc],
    ['Missing', ma, mb, mc],
    ['Median', ("%.3f" % assocprof_median), ("%.3f" % asstprof_median), ("%.3f" % prof_median)],
    ['Mean', ("%.3f" % assocprof_mean),("%.3f" % asstprof_mean),("%.3f" % prof_mean)],
    ['Standard Error of Mean', "%.3f" % assocprof_se_mean, "%.3f" % asstprof_se_mean, "%.3f" % prof_se_mean],
    ['Standard Deviation', ("%.3f" % assocprof_stdev),("%.3f" % asstprof_stdev),("%.3f" % prof_stdev)],
    ['Coefficient of Variation',assocprof_cv, asstprof_cv, prof_cv],
    ['Skewness', ("%.3f" % assocprof_skew),("%.3f" % asstprof_skew),("%.3f" % prof_skew)],
    ['Standard Error of Skewness', "%.3f" % assocprof_se_skew, "%.3f" % asstprof_se_skew, "%.3f" % prof_se_skew],
    ['Kurtosis', ("%.3f" % assocprof_k), ("%.3f" % asstprof_k), ("%.3f" % prof_k)],
    ['Standard Error of Kurtosis', "%.3f" % assocprof_se_k, "%.3f" % asstprof_se_k, "%.3f" % prof_se_k],
    ['Minimum', "%.3f" % assocprof_min,  "%.3f" % asstprof_min, "%.3f" % prof_min],
    ['Maximum', "%.3f" % assocprof_max, "%.3f" % asstprof_max, "%.3f" % prof_max],
    ['25th percentile', "%.3f" % assocprof_desc[0], "%.3f" % asstprof_desc[0], "%.3f" % prof_desc[0]],
    ['50th percentile', "%.3f" % assocprof_desc[1], "%.3f" % asstprof_desc[1], "%.3f" % prof_desc[1]],
    ['75th percentile', "%.3f" % assocprof_desc[2], "%.3f" % asstprof_desc[2], "%.3f" % prof_desc[2]],
    ['10th percentile', "%.3f" % assocprof_desc[3], "%.3f" % asstprof_desc[3], "%.3f" % prof_desc[3]],
    ['20th percentile', "%.3f" % assocprof_desc[4], "%.3f" % asstprof_desc[4], "%.3f" % prof_desc[4]],
    ['30th percentile', "%.3f" % assocprof_desc[5], "%.3f" % asstprof_desc[5], "%.3f" % prof_desc[5]],
    ['40th percentile', "%.3f" % assocprof_desc[6], "%.3f" % asstprof_desc[6], "%.3f" % prof_desc[6]],
    ['60th percentile', "%.3f" % assocprof_desc[7], "%.3f" % asstprof_desc[7], "%.3f" % prof_desc[7]],
    ['70th percentile', "%.3f" % assocprof_desc[8], "%.3f" % asstprof_desc[8], "%.3f" % prof_desc[8]],
    ['80th percentile', "%.3f" % assocprof_desc[9], "%.3f" % asstprof_desc[9], "%.3f" % prof_desc[9]],
    ['90th percentile', "%.3f" % assocprof_desc[10], "%.3f" % asstprof_desc[10], "%.3f" % prof_desc[10]],

]

# Define the headers for the table
headers = ['', '', 'Salaries','']

# Print the tabulated data
print(tabulate(data, headers=headers, tablefmt='grid'))

+----------------------------+------------+------------+------------+
|                            |            | Salaries   |            |
|                            | AssocProf  | AsstProf   | Prof       |
+----------------------------+------------+------------+------------+
| Valid                      | 13         | 19         | 46         |
+----------------------------+------------+------------+------------+
| Missing                    | 0          | 0          | 0          |
+----------------------------+------------+------------+------------+
| Median                     | 103613.000 | 78500.000  | 123321.500 |
+----------------------------+------------+------------+------------+
| Mean                       | 91786.231  | 81362.789  | 123624.804 |
+----------------------------+------------+------------+------------+
| Standard Error of Mean     | 5150.720   | 2152.205   | 3663.975   |
+----------------------------+------------+------------+------------+
| Standard Deviation