# Loading a CSV file

In [1]:
import pandas as pd

# Load CSV file into DataFrame
df = pd.read_csv('Sample.csv')

In [2]:
df.head()

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native,income_>50K
0,67,Private,366425,Doctorate,16,Divorced,Exec-managerial,Not-in-family,White,Male,99999,0,60,United-States,1
1,17,Private,244602,12th,8,Never-married,Other-service,Own-child,White,Male,0,0,15,United-States,0
2,31,Private,174201,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,40,United-States,1
3,58,State-gov,110199,7th-8th,4,Married-civ-spouse,Transport-moving,Husband,White,Male,0,0,40,United-States,0
4,25,State-gov,149248,Some-college,10,Never-married,Other-service,Not-in-family,Black,Male,0,0,40,United-States,0


# Filtering Data

In [5]:
filtered_df = df[df['age'] >40]
filtered_df

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native,income_>50K
0,67,Private,366425,Doctorate,16,Divorced,Exec-managerial,Not-in-family,White,Male,99999,0,60,United-States,1
3,58,State-gov,110199,7th-8th,4,Married-civ-spouse,Transport-moving,Husband,White,Male,0,0,40,United-States,0
5,59,State-gov,105363,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,40,United-States,0
6,70,Private,216390,9th,5,Married-civ-spouse,Machine-op-inspct,Wife,White,Female,2653,0,40,United-States,0
12,59,Self-emp-not-inc,241297,Some-college,10,Widowed,Farming-fishing,Not-in-family,White,Female,6849,0,40,United-States,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43945,46,Private,177114,Assoc-acdm,12,Widowed,Prof-specialty,Unmarried,White,Female,0,0,27,United-States,0
43946,47,Private,420986,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,40,United-States,1
43948,44,Private,247880,Assoc-voc,11,Divorced,Exec-managerial,Not-in-family,White,Male,8614,0,40,United-States,1
43952,52,Private,68982,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,50,United-States,1


# Handling Missing Values

In [7]:
missing_values = df.isnull().sum()
print(missing_values)

age                   0
workclass          2498
fnlwgt                0
education             0
educational-num       0
marital               0
occupation         2506
relationship          0
race                  0
gender                0
capital-gain          0
capital-loss          0
hours-per-week        0
native              763
income_>50K           0
dtype: int64


In [9]:
#Method-1 Fill missing values with 0
df_filled = df.fillna(0) 
df_filled

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native,income_>50K
0,67,Private,366425,Doctorate,16,Divorced,Exec-managerial,Not-in-family,White,Male,99999,0,60,United-States,1
1,17,Private,244602,12th,8,Never-married,Other-service,Own-child,White,Male,0,0,15,United-States,0
2,31,Private,174201,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,40,United-States,1
3,58,State-gov,110199,7th-8th,4,Married-civ-spouse,Transport-moving,Husband,White,Male,0,0,40,United-States,0
4,25,State-gov,149248,Some-college,10,Never-married,Other-service,Not-in-family,Black,Male,0,0,40,United-States,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43952,52,Private,68982,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,50,United-States,1
43953,19,Private,116562,HS-grad,9,Never-married,Other-service,Own-child,White,Female,0,0,40,United-States,0
43954,30,Private,197947,Some-college,10,Divorced,Sales,Not-in-family,White,Male,0,0,58,United-States,0
43955,46,Private,97883,Bachelors,13,Never-married,Sales,Not-in-family,White,Female,0,0,35,United-States,0


In [11]:
missing_values = df_filled.isnull().sum()
print(missing_values)

age                0
workclass          0
fnlwgt             0
education          0
educational-num    0
marital            0
occupation         0
relationship       0
race               0
gender             0
capital-gain       0
capital-loss       0
hours-per-week     0
native             0
income_>50K        0
dtype: int64


In [13]:
missing_values = df.isnull().sum()
print(missing_values)

age                   0
workclass          2498
fnlwgt                0
education             0
educational-num       0
marital               0
occupation         2506
relationship          0
race                  0
gender                0
capital-gain          0
capital-loss          0
hours-per-week        0
native              763
income_>50K           0
dtype: int64


In [15]:
#2 Fill missing values in 'workclass,occupation,native' column with the Private,Other-service,United-States respectively  
# Or fill with mean ,mode,median in case of numeric dat
df['workclass'].fillna('Private', inplace=True)
df['occupation'].fillna('Other-service', inplace=True)
df['native'].fillna('United-States', inplace=True)

In [17]:
df

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native,income_>50K
0,67,Private,366425,Doctorate,16,Divorced,Exec-managerial,Not-in-family,White,Male,99999,0,60,United-States,1
1,17,Private,244602,12th,8,Never-married,Other-service,Own-child,White,Male,0,0,15,United-States,0
2,31,Private,174201,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,40,United-States,1
3,58,State-gov,110199,7th-8th,4,Married-civ-spouse,Transport-moving,Husband,White,Male,0,0,40,United-States,0
4,25,State-gov,149248,Some-college,10,Never-married,Other-service,Not-in-family,Black,Male,0,0,40,United-States,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43952,52,Private,68982,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,50,United-States,1
43953,19,Private,116562,HS-grad,9,Never-married,Other-service,Own-child,White,Female,0,0,40,United-States,0
43954,30,Private,197947,Some-college,10,Divorced,Sales,Not-in-family,White,Male,0,0,58,United-States,0
43955,46,Private,97883,Bachelors,13,Never-married,Sales,Not-in-family,White,Female,0,0,35,United-States,0


In [19]:
missing_values = df.isnull().sum()
print(missing_values)

age                0
workclass          0
fnlwgt             0
education          0
educational-num    0
marital            0
occupation         0
relationship       0
race               0
gender             0
capital-gain       0
capital-loss       0
hours-per-week     0
native             0
income_>50K        0
dtype: int64


# Calculating Summary Statistics

In [21]:
summary_stats = df.describe()
print(summary_stats)

                age        fnlwgt  educational-num  capital-gain  \
count  43957.000000  4.395700e+04     43957.000000  43957.000000   
mean      38.617149  1.896730e+05        10.074118   1093.559797   
std       13.734401  1.058215e+05         2.575092   7570.536063   
min       17.000000  1.349200e+04         1.000000      0.000000   
25%       28.000000  1.174960e+05         9.000000      0.000000   
50%       37.000000  1.781000e+05        10.000000      0.000000   
75%       48.000000  2.376710e+05        12.000000      0.000000   
max       90.000000  1.490400e+06        16.000000  99999.000000   

       capital-loss  hours-per-week   income_>50K  
count  43957.000000    43957.000000  43957.000000  
mean      88.246491       40.407694      0.239279  
std      404.588410       12.400303      0.426648  
min        0.000000        1.000000      0.000000  
25%        0.000000       40.000000      0.000000  
50%        0.000000       40.000000      0.000000  
75%        0.000000    

In [23]:
mean_value = df['age'].mean()
print('Mean Age:',mean_value)
median_value = df['age'].median()
print('Median:',median_value)
std_dev = df['age'].std()
print('Standard Daviation:',std_dev)

Mean Age: 38.61714857701845
Median: 37.0
Standard Daviation: 13.734400969233622
