In [1]:
import pandas as pd




In [2]:
df = pd.read_csv("census.csv")

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Age                2000 non-null   int64  
 1   Eduation           2000 non-null   object 
 2   Marital_Status     2000 non-null   object 
 3   Gender             2000 non-null   object 
 4   Tax_Filing_Status  2000 non-null   object 
 5   Income             2000 non-null   float64
 6   Parents_Status     2000 non-null   object 
 7   Country_of_Birth   2000 non-null   object 
 8    Nativity status   2000 non-null   object 
 9   Weeks_worked       2000 non-null   int64  
dtypes: float64(1), int64(2), object(7)
memory usage: 156.4+ KB


### Check missing values

In [4]:
df.isnull().sum()

Age                  0
Eduation             0
Marital_Status       0
Gender               0
Tax_Filing_Status    0
Income               0
Parents_Status       0
Country_of_Birth     0
 Nativity status     0
Weeks_worked         0
dtype: int64

### Filter Working Age Population (Age >= 18)

In [7]:
df_working = df[df["Age"] >= 18]

print("Total Working Age Population:", len(df_working))


Total Working Age Population: 1465


### Education Count (How Many Educated People)

In [8]:
education_count = df_working["Eduation"].value_counts()

education_count

Eduation
Highschoolgraduate                   483
Somecollegebutnodegree               295
Bachelorsdegree(BAABBS)              206
Mastersdegree(MAMSMEngMEdMSWMBA)      68
7thand8thgrade                        60
11thgrade                             57
10thgrade                             56
Associatesdegree-occup/vocational     56
Associatesdegree-academicprogram      50
9thgrade                              31
5thor6thgrade                         21
Profschooldegree(MDDDSDVMLLBJD)       20
12thgradenodiploma                    17
1st2nd3rdor4thgrade                   16
Doctoratedegree(PhDEdD)               15
Lessthan1stgrade                      14
Name: count, dtype: int64

### Gender-wise Count

In [10]:
gender_count = df_working["Gender"].value_counts()

gender_count

Gender
Female    798
Male      667
Name: count, dtype: int64

### Marital Status Count

In [11]:
marital_count = df_working["Marital_Status"].value_counts()

marital_count

Marital_Status
Married-civilianspousepresent    869
Nevermarried                     334
Divorced                         122
Widowed                           89
Separated                         26
Married-spouseabsent              17
Married-AFspousepresent            8
Name: count, dtype: int64

### Parents Status Count

In [12]:
parents_count = df_working["Parents_Status"].value_counts()

parents_count

Parents_Status
Notinuniverse    1465
Name: count, dtype: int64

### Weeks Worked Analysis

In [13]:
df_working["Weeks_worked"].mean()

np.float64(33.63686006825939)

### People Working Less Than 20 Weeks

In [14]:
low_weeks = df_working[df_working["Weeks_worked"] < 20]

len(low_weeks)

460

### Average Income Based on Education

In [15]:
income_by_education = df_working.groupby("Eduation")["Income"].mean().sort_values(ascending=False)

income_by_education

Eduation
Profschooldegree(MDDDSDVMLLBJD)      2053.327000
Doctoratedegree(PhDEdD)              1954.134667
Somecollegebutnodegree               1845.371966
Highschoolgraduate                   1837.028385
1st2nd3rdor4thgrade                  1787.718750
Associatesdegree-occup/vocational    1777.209821
11thgrade                            1751.062807
9thgrade                             1653.922258
Associatesdegree-academicprogram     1640.910000
Mastersdegree(MAMSMEngMEdMSWMBA)     1601.586912
10thgrade                            1598.416250
Bachelorsdegree(BAABBS)              1596.193204
5thor6thgrade                        1587.560476
7thand8thgrade                       1580.914333
12thgradenodiploma                   1525.017647
Lessthan1stgrade                     1509.856429
Name: Income, dtype: float64

### Age Group Categorization

In [16]:
df_working["Age_Group"] = pd.cut(
    df_working["Age"],
    bins=[18, 25, 35, 45, 60, 100],
    labels=["18-25", "26-35", "36-45", "46-60", "60+"]
)

df_working["Age_Group"].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_working["Age_Group"] = pd.cut(


Age_Group
36-45    344
26-35    325
46-60    308
60+      264
18-25    197
Name: count, dtype: int64

### Employment Assignment

In [18]:
df_working["Employment_Status"] = df_working["Weeks_worked"].apply(
    lambda x: "Needs Employment Support" if x < 20 else "Employed"
)

df_working["Employment_Status"].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_working["Employment_Status"] = df_working["Weeks_worked"].apply(


Employment_Status
Employed                    1005
Needs Employment Support     460
Name: count, dtype: int64

### (Education vs Employment)

In [19]:
pd.crosstab(df_working["Eduation"], df_working["Employment_Status"])

Employment_Status,Employed,Needs Employment Support
Eduation,Unnamed: 1_level_1,Unnamed: 2_level_1
10thgrade,23,33
11thgrade,31,26
12thgradenodiploma,12,5
1st2nd3rdor4thgrade,5,11
5thor6thgrade,11,10
7thand8thgrade,15,45
9thgrade,14,17
Associatesdegree-academicprogram,39,11
Associatesdegree-occup/vocational,47,9
Bachelorsdegree(BAABBS),178,28


### Gender + Marital + Employment Combined

In [20]:
pd.crosstab(
    [df_working["Gender"], df_working["Marital_Status"]],
    df_working["Employment_Status"]
)

Unnamed: 0_level_0,Employment_Status,Employed,Needs Employment Support
Gender,Marital_Status,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,Divorced,49,21
Female,Married-AFspousepresent,4,4
Female,Married-civilianspousepresent,300,162
Female,Married-spouseabsent,7,2
Female,Nevermarried,102,57
Female,Separated,10,5
Female,Widowed,18,57
Male,Divorced,39,13
Male,Married-civilianspousepresent,328,79
Male,Married-spouseabsent,7,1
