In [None]:
# this instruction is used to import the library pandas so we can use it later
# "as pd" means that we can use the term pd to use the library instead of typing pandas
import pandas as pd

In [None]:
# .read_csv is an input method used to read csv files (datasets)
# remember we use pd instead of pandas 
# Also make sure that both files (the notebook and the dataset) are in the same folder
# otherwise, you will need to provide the full path instead of just the name of the dataset
salaries = pd.read_csv("Salaries.csv", low_memory=False)

In [None]:
# this will allow to create a new subdataset, but with a limited number of instances
# for example, here we are selecting all instance where the salary has been recorded in 2014
# .Year == 2014
Salaries_2014 = salaries[salaries.Year == 2014]

In [None]:
# of course we can call the method .describe() for this new subdataset
Salaries_2014.describe()

In [None]:
# we can print the mean and max for the newly created dataset Salaries_2014
print("Mean: %f" % Salaries_2014["TotalPay"].mean())
print("Max: %f" % Salaries_2014["TotalPay"].max())

In [None]:
# .sort_values is used to create the values in specific order
# The following will make a selection on TotalPay and then sort the values in ascending order
Salaries_2014.TotalPay.sort_values()

In [None]:
# we can also sort them in descending order
# more information on .sort_values can be found on the followingn link
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sort_values.html
Salaries_2014.TotalPay.sort_values(ascending=False)

In [None]:
# we can also group the data by a specific attribute
# more information on .groupby can be found on the followingn link
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html
role=salaries.groupby("JobTitle")
# and then we can display the mean for each group
role.mean()

In [None]:
# again, if needed, we can describe the newly created dataset
role.TotalPay.describe()

In [None]:
# but we can also describe a specific attribute in a dataset
role.TotalPay.describe()

In [None]:
# if we don't want to use describe, but we are interested in more than one function
# we can use the agg to aggregate using one or more operations over the specified axis
Salaries_2014.groupby('JobTitle').TotalPay.agg(["mean", "min", "max", "median"])