# **Pandas Intermediate**

### **Import pandas**

In [1]:
import pandas as pd


In [6]:
adult_df = pd.read_csv('adult.csv')
adult_df

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,25,Private,226802,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688,0,40,United-States,>50K
4,18,?,103497,Some-college,10,Never-married,?,Own-child,White,Female,0,0,30,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,27,Private,257302,Assoc-acdm,12,Married-civ-spouse,Tech-support,Wife,White,Female,0,0,38,United-States,<=50K
48838,40,Private,154374,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,40,United-States,>50K
48839,58,Private,151910,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
48840,22,Private,201490,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K


In [7]:
# Get all the columns
adult_df.columns

Index(['age', 'workclass', 'fnlwgt', 'education', 'educational-num',
       'marital-status', 'occupation', 'relationship', 'race', 'gender',
       'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
       'income'],
      dtype='object')

## **Data Inspection** 

Data inspection is the initial review of a dataset to find missing values, 
incorrect data types, and gather basic statistics, providing insights into its quality and structure.

In [15]:
# 1: Check missing values

adult_df.isnull() # here T = 1 and F = 0, so we can add them to see if there is any null in the DataFrame

adult_df.isnull().sum() # this returns 0 for all the columns, means all cells are not null

age                0
workclass          0
fnlwgt             0
education          0
educational-num    0
marital-status     0
occupation         0
relationship       0
race               0
gender             0
capital-gain       0
capital-loss       0
hours-per-week     0
native-country     0
income             0
dtype: int64

In [17]:
# 1-2: Check missing values

(adult_df == "?") # this retuns if the cell has "?" (True) or not (False)

(adult_df == "?").sum() # now we know some data has null value


age                   0
workclass          2799
fnlwgt                0
education             0
educational-num       0
marital-status        0
occupation         2809
relationship          0
race                  0
gender                0
capital-gain          0
capital-loss          0
hours-per-week        0
native-country      857
income                0
dtype: int64

In [20]:
# 2: Check incorrect data types

adult_df.dtypes

age                 int64
workclass          object
fnlwgt              int64
education          object
educational-num     int64
marital-status     object
occupation         object
relationship       object
race               object
gender             object
capital-gain        int64
capital-loss        int64
hours-per-week      int64
native-country     object
income             object
dtype: object

In [21]:
adult_df.describe()

Unnamed: 0,age,fnlwgt,educational-num,capital-gain,capital-loss,hours-per-week
count,48842.0,48842.0,48842.0,48842.0,48842.0,48842.0
mean,38.643585,189664.1,10.078089,1079.067626,87.502314,40.422382
std,13.71051,105604.0,2.570973,7452.019058,403.004552,12.391444
min,17.0,12285.0,1.0,0.0,0.0,1.0
25%,28.0,117550.5,9.0,0.0,0.0,40.0
50%,37.0,178144.5,10.0,0.0,0.0,40.0
75%,48.0,237642.0,12.0,0.0,0.0,45.0
max,90.0,1490400.0,16.0,99999.0,4356.0,99.0


In [22]:
adult_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48842 entries, 0 to 48841
Data columns (total 15 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   age              48842 non-null  int64 
 1   workclass        48842 non-null  object
 2   fnlwgt           48842 non-null  int64 
 3   education        48842 non-null  object
 4   educational-num  48842 non-null  int64 
 5   marital-status   48842 non-null  object
 6   occupation       48842 non-null  object
 7   relationship     48842 non-null  object
 8   race             48842 non-null  object
 9   gender           48842 non-null  object
 10  capital-gain     48842 non-null  int64 
 11  capital-loss     48842 non-null  int64 
 12  hours-per-week   48842 non-null  int64 
 13  native-country   48842 non-null  object
 14  income           48842 non-null  object
dtypes: int64(6), object(9)
memory usage: 5.6+ MB


## **Cleaning Data**

Cleaning data involves eliminating or rectifying inaccuracies, inconsistencies, 
and missing values within your dataset, utilizing techniques such as handling 
missing values via deletion or imputation, rectifying data types, and detecting 
and eliminating duplicate entries, ultimately resulting in more precise and dependable analysis.

In [23]:
#.replace(the value to replace, a new value, update the original or create a copy)
adult_df.replace("?", pd.NA, inplace=True)

In [28]:
adult_df  # now it replaced all "?" with "<NA>"
adult_df.isnull().sum()  # and this works

age                   0
workclass          2799
fnlwgt                0
education             0
educational-num       0
marital-status        0
occupation         2809
relationship          0
race                  0
gender                0
capital-gain          0
capital-loss          0
hours-per-week        0
native-country      857
income                0
dtype: int64

In [29]:
# .fillna("New Value") to replace the NAs IN A COPY (therefore needs to replace it with the original)
adult_df["occupation"] = adult_df["occupation"].fillna("Unemployed")

In [30]:
adult_df

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,25,Private,226802,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688,0,40,United-States,>50K
4,18,,103497,Some-college,10,Never-married,Unemployed,Own-child,White,Female,0,0,30,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,27,Private,257302,Assoc-acdm,12,Married-civ-spouse,Tech-support,Wife,White,Female,0,0,38,United-States,<=50K
48838,40,Private,154374,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,40,United-States,>50K
48839,58,Private,151910,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
48840,22,Private,201490,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K


In [31]:
# .dropna = drop all the rows that contain <NA> in the DataFrame
# it will create a copy to save the original unless we enter "inplace=True" as a paremeter

adult_df.dropna(inplace=True)

In [32]:
adult_df

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,25,Private,226802,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688,0,40,United-States,>50K
5,34,Private,198693,10th,6,Never-married,Other-service,Not-in-family,White,Male,0,0,30,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,27,Private,257302,Assoc-acdm,12,Married-civ-spouse,Tech-support,Wife,White,Female,0,0,38,United-States,<=50K
48838,40,Private,154374,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,40,United-States,>50K
48839,58,Private,151910,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
48840,22,Private,201490,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K


In [33]:
adult_df.dtypes

age                 int64
workclass          object
fnlwgt              int64
education          object
educational-num     int64
marital-status     object
occupation         object
relationship       object
race               object
gender             object
capital-gain        int64
capital-loss        int64
hours-per-week      int64
native-country     object
income             object
dtype: object

In [34]:
adult_df["income"] = adult_df["income"].astype('category')
adult_df.dtypes

age                   int64
workclass            object
fnlwgt                int64
education            object
educational-num       int64
marital-status       object
occupation           object
relationship         object
race                 object
gender               object
capital-gain          int64
capital-loss          int64
hours-per-week        int64
native-country       object
income             category
dtype: object

### **Trimming and Cleaning Text Data**

In [36]:
# .strip() will create a copy, so replace the original column with the copied one.
adult_df["workclass"] = adult_df["workclass"].str.strip()
adult_df["workclass"]

0             Private
1             Private
2           Local-gov
3             Private
5             Private
             ...     
48837         Private
48838         Private
48839         Private
48840         Private
48841    Self-emp-inc
Name: workclass, Length: 45232, dtype: object

In [37]:
adult_df["occupation"]

0        Machine-op-inspct
1          Farming-fishing
2          Protective-serv
3        Machine-op-inspct
5            Other-service
               ...        
48837         Tech-support
48838    Machine-op-inspct
48839         Adm-clerical
48840         Adm-clerical
48841      Exec-managerial
Name: occupation, Length: 45232, dtype: object

In [38]:
occupation_mapping_dictionary = {
    "Machine-op-inspct": "Machine-operator",
    "Farming-fishing": "Farming-and-fishing",
    "Proactive-serv" : "Proactive-services"
}

# update the original with the newly created shallow copy. but fill NAs with the original values
adult_df["occupation"] = adult_df["occupation"].map(occupation_mapping_dictionary).fillna(adult_df["occupation"])

In [39]:
adult_df  # now the values are updated in the original DataFrame 

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,25,Private,226802,11th,7,Never-married,Machine-operator,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-and-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-operator,Husband,Black,Male,7688,0,40,United-States,>50K
5,34,Private,198693,10th,6,Never-married,Other-service,Not-in-family,White,Male,0,0,30,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,27,Private,257302,Assoc-acdm,12,Married-civ-spouse,Tech-support,Wife,White,Female,0,0,38,United-States,<=50K
48838,40,Private,154374,HS-grad,9,Married-civ-spouse,Machine-operator,Husband,White,Male,0,0,40,United-States,>50K
48839,58,Private,151910,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
48840,22,Private,201490,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K


In [49]:
# Replace the original with updated info
adult_df["marital-status"] = adult_df["marital-status"].replace('-', ' ', regex=True)
adult_df

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,25,Private,226802,11th,7,Never married,Machine-operator,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married civ spouse,Farming-and-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married civ spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
3,44,Private,160323,Some-college,10,Married civ spouse,Machine-operator,Husband,Black,Male,7688,0,40,United-States,>50K
5,34,Private,198693,10th,6,Never married,Other-service,Not-in-family,White,Male,0,0,30,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,27,Private,257302,Assoc-acdm,12,Married civ spouse,Tech-support,Wife,White,Female,0,0,38,United-States,<=50K
48838,40,Private,154374,HS-grad,9,Married civ spouse,Machine-operator,Husband,White,Male,0,0,40,United-States,>50K
48839,58,Private,151910,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
48840,22,Private,201490,HS-grad,9,Never married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K


In [51]:
# Replace the original with updated info 2
adult_df["marital-status"] = adult_df["marital-status"].replace(' ', '-', regex=True)
adult_df

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,25,Private,226802,11th,7,Never-married,Machine-operator,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-and-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-operator,Husband,Black,Male,7688,0,40,United-States,>50K
5,34,Private,198693,10th,6,Never-married,Other-service,Not-in-family,White,Male,0,0,30,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,27,Private,257302,Assoc-acdm,12,Married-civ-spouse,Tech-support,Wife,White,Female,0,0,38,United-States,<=50K
48838,40,Private,154374,HS-grad,9,Married-civ-spouse,Machine-operator,Husband,White,Male,0,0,40,United-States,>50K
48839,58,Private,151910,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
48840,22,Private,201490,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K


### **Renaming columns and Reindexing**

In [53]:
# Rename columns and update the original directly (inplace=True)

adult_df.rename(columns={"native-country": "country", "hours-per-week": "working-hours"}, inplace=True)
adult_df.columns

Index(['age', 'workclass', 'fnlwgt', 'education', 'educational-num',
       'marital-status', 'occupation', 'relationship', 'race', 'gender',
       'capital-gain', 'capital-loss', 'working-hours', 'country', 'income'],
      dtype='object')

In [56]:
# Reindexing (for reporting etc.) = reindex and display specific columns
adult_df.reindex(columns=["gender", "martital-status", "country"])

# it is nothing to do with the original

Unnamed: 0,gender,martital-status,country
0,Male,,United-States
1,Male,,United-States
2,Male,,United-States
3,Male,,United-States
5,Male,,United-States
...,...,...,...
48837,Female,,United-States
48838,Male,,United-States
48839,Female,,United-States
48840,Male,,United-States


### **Removing Columns and Rows**

In [58]:
# Delete a single row
adult_df.drop(0)

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,working-hours,country,income
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-and-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-operator,Husband,Black,Male,7688,0,40,United-States,>50K
5,34,Private,198693,10th,6,Never-married,Other-service,Not-in-family,White,Male,0,0,30,United-States,<=50K
7,63,Self-emp-not-inc,104626,Prof-school,15,Married-civ-spouse,Prof-specialty,Husband,White,Male,3103,0,32,United-States,>50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,27,Private,257302,Assoc-acdm,12,Married-civ-spouse,Tech-support,Wife,White,Female,0,0,38,United-States,<=50K
48838,40,Private,154374,HS-grad,9,Married-civ-spouse,Machine-operator,Husband,White,Male,0,0,40,United-States,>50K
48839,58,Private,151910,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
48840,22,Private,201490,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K


In [59]:
# Delete Multiple Row

adult_df.drop([2, 5, 48837])

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,working-hours,country,income
0,25,Private,226802,11th,7,Never-married,Machine-operator,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-and-fishing,Husband,White,Male,0,0,50,United-States,<=50K
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-operator,Husband,Black,Male,7688,0,40,United-States,>50K
7,63,Self-emp-not-inc,104626,Prof-school,15,Married-civ-spouse,Prof-specialty,Husband,White,Male,3103,0,32,United-States,>50K
8,24,Private,369667,Some-college,10,Never-married,Other-service,Unmarried,White,Female,0,0,40,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48836,22,Private,310152,Some-college,10,Never-married,Protective-serv,Not-in-family,White,Male,0,0,40,United-States,<=50K
48838,40,Private,154374,HS-grad,9,Married-civ-spouse,Machine-operator,Husband,White,Male,0,0,40,United-States,>50K
48839,58,Private,151910,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
48840,22,Private,201490,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K


In [None]:
# Delete a Single Column

adult_df.drop('fnlwgt', axis=1)

In [61]:
# This will display the axes

adult_df.axes

[Index([    0,     1,     2,     3,     5,     7,     8,     9,    10,    11,
        ...
        48832, 48833, 48834, 48835, 48836, 48837, 48838, 48839, 48840, 48841],
       dtype='int64', length=45232),
 Index(['age', 'workclass', 'fnlwgt', 'education', 'educational-num',
        'marital-status', 'occupation', 'relationship', 'race', 'gender',
        'capital-gain', 'capital-loss', 'working-hours', 'country', 'income'],
       dtype='object')]

In [65]:
# Deleting multiple columns

shortened_df = adult_df.drop(['age', 'workclass', 'fnlwgt'], axis=1)
shortened_df

Unnamed: 0,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,working-hours,country,income
0,11th,7,Never-married,Machine-operator,Own-child,Black,Male,0,0,40,United-States,<=50K
1,HS-grad,9,Married-civ-spouse,Farming-and-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
3,Some-college,10,Married-civ-spouse,Machine-operator,Husband,Black,Male,7688,0,40,United-States,>50K
5,10th,6,Never-married,Other-service,Not-in-family,White,Male,0,0,30,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...
48837,Assoc-acdm,12,Married-civ-spouse,Tech-support,Wife,White,Female,0,0,38,United-States,<=50K
48838,HS-grad,9,Married-civ-spouse,Machine-operator,Husband,White,Male,0,0,40,United-States,>50K
48839,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
48840,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K


## **Handling Duplicates**

Identifying and removing duplicate records are crucial for maintaining data quality. 
Pandas provides .duplicated() and .drop_duplicates() for finding and removing duplicates, 
ensuring each data point is unique for accurate analysis.

In [68]:
# Check the number of duplicated rows and sum the total
print("Duplicated Rows: ", adult_df.duplicated().sum())

Duplicated Rows:  47


In [69]:
# delete all duplicated rows in the original DataFrame directly
adult_df.drop_duplicates(inplace=True)
print("Duplicated Rows: ", adult_df.duplicated().sum())

Duplicated Rows:  0


### **Aggregating Data** (.groupby)

Aggregating data involves summarizing data points into meaningful statistics, 
such as averages, sums, or counts, which can be achieved using GroupBy operations or pivot tables. 
This helps in understanding the dataset at a higher level.

In [73]:
adult_df["occupation"].unique()

array(['Machine-operator', 'Farming-and-fishing', 'Protective-serv',
       'Other-service', 'Prof-specialty', 'Craft-repair', 'Adm-clerical',
       'Exec-managerial', 'Tech-support', 'Sales', 'Priv-house-serv',
       'Transport-moving', 'Handlers-cleaners', 'Armed-Forces',
       'Unemployed'], dtype=object)

In [74]:
min_age_series = adult_df.groupby("occupation")["age"].min()
min_age_series

occupation
Adm-clerical           17
Armed-Forces           23
Craft-repair           17
Exec-managerial        17
Farming-and-fishing    17
Handlers-cleaners      17
Machine-operator       17
Other-service          17
Priv-house-serv        17
Prof-specialty         17
Protective-serv        17
Sales                  17
Tech-support           17
Transport-moving       17
Unemployed             17
Name: age, dtype: int64

In [75]:
max_age_series = adult_df.groupby("occupation")["age"].max()
max_age_series

occupation
Adm-clerical           90
Armed-Forces           52
Craft-repair           90
Exec-managerial        90
Farming-and-fishing    90
Handlers-cleaners      90
Machine-operator       90
Other-service          90
Priv-house-serv        84
Prof-specialty         90
Protective-serv        90
Sales                  90
Tech-support           81
Transport-moving       90
Unemployed             30
Name: age, dtype: int64

In [76]:
mean_age_series = adult_df.groupby("occupation")["age"].mean()
mean_age_series

occupation
Adm-clerical           37.218609
Armed-Forces           31.785714
Craft-repair           38.970882
Exec-managerial        42.211371
Farming-and-fishing    41.400000
Handlers-cleaners      32.606846
Machine-operator       37.711298
Other-service          35.051613
Priv-house-serv        43.682609
Prof-specialty         40.531578
Protective-serv        38.890256
Sales                  37.446253
Tech-support           37.208598
Transport-moving       40.721934
Unemployed             19.900000
Name: age, dtype: float64