# Importing Libraries

In [4]:
import pandas as pd
import numpy as np

# Data Preparation

## Making list of Column names

In [5]:
columns = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income']

## Reading the dataset

In [6]:
df = pd.read_csv('adult.data', sep = ",", names = columns)
df

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8596,42,Private,211253,Bachelors,13,Divorced,Exec-managerial,Unmarried,White,Female,0,0,40,United-States,<=50K
8597,32,Federal-gov,191385,Assoc-acdm,12,Divorced,Protective-serv,Not-in-family,White,Male,2174,0,40,United-States,<=50K
8598,20,Private,137895,HS-grad,9,Married-civ-spouse,Sales,Husband,White,Male,0,0,45,United-States,<=50K
8599,62,State-gov,159699,Bachelors,13,Married-civ-spouse,Adm-clerical,Husband,White,Male,0,0,38,United-States,<=50K


## Information About Data Features

In [16]:
info = {
    'columns' : columns,
    'information' : ['`Age` of People in the survey',
                   "In U.S. Census data, `employment` signifies an individual's work status, reflecting whether they are employed, unemployed, or not in the labor force.",
                   "`fnlwgt` in the U.S. Census context means final weight, a statistical measure adjusting for sampling biases to ensure representativeness.",
                   "In U.S. Census data, `education` typically refers to an individual's highest level of formal education completed, providing demographic insights.",
                   "`education-num` in the U.S. Census context refers to the number of educational years completed, indicating an individual's educational attainment level.",
                   "`Marital-status` in U.S. Census data signifies an individual's current relationship status, providing insights into their marital condition or partnership.",
                   "In U.S. Census data, `occupation` represents an individual's employment or profession, categorizing their work role or job responsibilities for analysis.",
                   "In U.S. Census data, `relationship` indicates the familial or non-familial connection between individuals within a household, defining their roles.",
                   "`Race` in American census data categorizes individuals based on socially constructed racial groups, including White, Black, Asian, Native American, and more.",
                   "American census data includes demographic information on `sex`, categorizing individuals as male or female for statistical analysis and representation.",
                   "`Capital gain` in American census data refers to profits from the sale of assets like stocks or real estate.",
                   "`Capital loss` in American census data refers to the decrease in the value of assets and investments reported for taxation.",
                   "The American census collects data on the number of `hours per week` individuals work, providing insights into employment patterns.",
                   "In American census data, `native country` refers to the country of birth or origin of an individual or household.",
                   "In American census data, `income` refers to the total earnings from all sources, including wages, investments, and government assistance."]
}

Dataset_info_information = pd.DataFrame(info, columns = ['columns','information'])
Dataset_info_information

Unnamed: 0,columns,information
0,age,`Age` of People in the survey
1,workclass,"In U.S. Census data, `employment` signifies an..."
2,fnlwgt,`fnlwgt` in the U.S. Census context means fina...
3,education,"In U.S. Census data, `education` typically ref..."
4,education-num,`education-num` in the U.S. Census context ref...
5,marital-status,`Marital-status` in U.S. Census data signifies...
6,occupation,"In U.S. Census data, `occupation` represents a..."
7,relationship,"In U.S. Census data, `relationship` indicates ..."
8,race,`Race` in American census data categorizes ind...
9,sex,American census data includes demographic info...


# Saving data to `csv` file

In [8]:
df.to_csv('data.csv')