### Problem Statement
Hello!

You have been hired by **'CACT'(Census Analysis and Collection Team)** to help with your numpy programming skills. 
Your major work for today involves census record management and data analysis.

About the Dataset
The snapshot of the data, you will be working on:

<img src="census_sample.png">

The dataset has details of 100 people with the following 8 features

<img src="census_desc.png" width='400' height='500' >

#### Why solve this project
After completing this project, you will have a better grip on working with numpy. In this project, you will apply the following concepts:

- Array Appending
- Array Slicing
- Array Filtering
- Array Aggregation

In [1]:
# Importing important libraries
import numpy as np

In [2]:
# File path
path = "file.csv"

In [3]:
#Reading file
data = np.genfromtxt(path, delimiter=",", skip_header=1)

In [4]:
data

array([[39., 13.,  4., ...,  0., 40.,  0.],
       [50., 13.,  4., ...,  0., 13.,  0.],
       [38.,  9.,  4., ...,  0., 40.,  0.],
       ...,
       [48., 13.,  4., ...,  0., 58.,  1.],
       [40., 10.,  4., ...,  0., 40.,  0.],
       [39., 13.,  4., ...,  0., 50.,  1.]])

In [5]:
#New record
new_record=[[50,  9,  4,  1,  0,  0, 40,  0]]

In [6]:
new_record_arr = np.array(new_record)
census = np.concatenate((data, new_record_arr), axis = 0)

In [7]:
data.shape, census.shape, census

((1000, 8),
 (1001, 8),
 array([[39., 13.,  4., ...,  0., 40.,  0.],
        [50., 13.,  4., ...,  0., 13.,  0.],
        [38.,  9.,  4., ...,  0., 40.,  0.],
        ...,
        [40., 10.,  4., ...,  0., 40.,  0.],
        [39., 13.,  4., ...,  0., 50.,  1.],
        [50.,  9.,  4., ...,  0., 40.,  0.]]))

In [8]:
# Creating a new array 'age' by taking only age values from census
age = census[0:,0]
age

array([39., 50., 38., ..., 40., 39., 50.])

In [9]:
# Find the maximum age in the array 'age'
max_age = np.amax(age)
max_age

90.0

In [10]:
# Find the minimum age in the array 'age'
min_age = np.amin(age)
min_age

17.0

In [11]:
# Find the mean age and the standard deviation in age.
age_mean = np.mean(age)
age_mean

38.06293706293706

In [12]:
age_std = np.std(age)
age_std

13.341478176165857

In [19]:
# Subsetting the different races to specific arrays
race_0 = np.array([])
race_1 = np.array([])
race_2 = np.array([])
race_3 = np.array([])
race_4 = np.array([])

for r in census[0:,2]:
    if(r==0):
        race_0 = np.append(race_0, r)
    elif(r==1):
        race_1 = np.append(race_1, r)
    elif(r==2):
        race_2 = np.append(race_2, r)
    elif(r==3):
        race_3 = np.append(race_3, r)
    elif(r==4):
        race_4 = np.append(race_4, r)

In [25]:
race_1

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [30]:
len_0 = len(race_0)
len_1 = len(race_1)
len_2 = len(race_2)
len_3 = len(race_3)
len_4 = len(race_4)

In [32]:
len_3

6

In [42]:
# Finding out the minority race ( race with minimum no. of citizens )
race_count = list([len_0, len_1, len_2, len_3, len_4])
race_count

[10, 27, 110, 6, 848]

In [44]:
minority_race = race_count.index(min(race_count))
minority_race

3

In [56]:
# STEP - 4

senior_citizens = census[census[0:,0] > 60]
senior_citizens

array([[7.9000e+01, 1.0000e+01, 4.0000e+00, 1.0000e+00, 0.0000e+00,
        0.0000e+00, 2.0000e+01, 0.0000e+00],
       [6.7000e+01, 6.0000e+00, 4.0000e+00, 1.0000e+00, 0.0000e+00,
        0.0000e+00, 2.0000e+00, 0.0000e+00],
       [7.6000e+01, 1.4000e+01, 4.0000e+00, 1.0000e+00, 0.0000e+00,
        0.0000e+00, 4.0000e+01, 1.0000e+00],
       [6.1000e+01, 9.0000e+00, 4.0000e+00, 1.0000e+00, 0.0000e+00,
        0.0000e+00, 4.0000e+01, 0.0000e+00],
       [7.0000e+01, 1.0000e+01, 4.0000e+00, 1.0000e+00, 0.0000e+00,
        0.0000e+00, 4.0000e+01, 0.0000e+00],
       [6.4000e+01, 7.0000e+00, 4.0000e+00, 1.0000e+00, 0.0000e+00,
        2.1790e+03, 4.0000e+01, 0.0000e+00],
       [7.1000e+01, 1.0000e+01, 2.0000e+00, 1.0000e+00, 0.0000e+00,
        1.8160e+03, 2.0000e+00, 0.0000e+00],
       [6.8000e+01, 2.0000e+00, 4.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 2.0000e+01, 0.0000e+00],
       [6.6000e+01, 1.1000e+01, 4.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 2.0000e+

In [58]:
working_hours_sum = sum(senior_citizens[0:,6])
working_hours_sum

1917.0

In [59]:
avg_working_hours = working_hours_sum / len(senior_citizens)
avg_working_hours

31.42622950819672

In [60]:
len(senior_citizens)

61

In [61]:
# STEP 5 - 
# Creating 'high' and 'low' subsets for people with education-num greater than 
# & lower than equal to 10 respectively.
high = census[census[0:,1] > 10]
low = census[census[0:,1] <= 10]

In [64]:
# Calculating mean of income for both the subsets
avg_pay_high = round(sum(high[0:,7])/len(high), 2)
avg_pay_low = round(sum(low[0:,7])/len(low), 2)
avg_pay_low

0.14