In [136]:
import pandas as pd
import numpy as np

### 1. Create a Pandas Series from a Python list, numpy array, and a dictionary.

In [137]:
list_series = pd.Series([38.93, 67.97, 83.8, 58.94, 125.1, 66.97, 333.3], name = "Population in Millions")

np_arr_series = pd.Series(np.array([3130.014, 4591.100, 2242.182, 2328.028, 3495.261, 28781.083, 4110.452]), name = "GDP in Billions")
                                   
dict_series = pd.Series(
    {"Canada": "North America",
     "France": "Europe",
     "Germany": "Europe",
     "Italy": "Europe",
     "Japan": "Asia",
     "UK": "Europe",
     "US": "North America"
    },
    name = "Continent of the Country"
)

print(list_series,'\n')
print(np_arr_series,'\n')
print(dict_series)

0     38.93
1     67.97
2     83.80
3     58.94
4    125.10
5     66.97
6    333.30
Name: Population in Millions, dtype: float64 

0     3130.014
1     4591.100
2     2242.182
3     2328.028
4     3495.261
5    28781.083
6     4110.452
Name: GDP in Billions, dtype: float64 

Canada     North America
France            Europe
Germany           Europe
Italy             Europe
Japan               Asia
UK                Europe
US         North America
Name: Continent of the Country, dtype: object


### 2. Assign a custom index to the Series.

In [138]:
list_series.index = ["Canada", "France", "Germany", "Italy", "Japan", "UK", "US"]
np_arr_series.index = ["France", "Germany", "Canada", "Italy", "UK", "US", "Japan"]

print(list_series,'\n')
print(np_arr_series,'\n')
print(dict_series)


Canada      38.93
France      67.97
Germany     83.80
Italy       58.94
Japan      125.10
UK          66.97
US         333.30
Name: Population in Millions, dtype: float64 

France      3130.014
Germany     4591.100
Canada      2242.182
Italy       2328.028
UK          3495.261
US         28781.083
Japan       4110.452
Name: GDP in Billions, dtype: float64 

Canada     North America
France            Europe
Germany           Europe
Italy             Europe
Japan               Asia
UK                Europe
US         North America
Name: Continent of the Country, dtype: object


### 3. Perform basic arithmetic operations on Series.

In [139]:
list_add = list_series * 10
print(list_add, '\n')

list_mul = list_series * 20
print(list_mul, '\n')

arr_sub = np_arr_series - 1000
print(arr_sub, '\n')

arr_div = np_arr_series / 50
print(arr_div, '\n')

list_mean = list_series.mean()
print('Mean: ',list_mean, '\n')

list_median = list_series.median()
print('Median: ',list_median, '\n')

arr_mode = np_arr_series.std()
print('Standard Deviation: ',arr_mode, '\n')

Canada      389.3
France      679.7
Germany     838.0
Italy       589.4
Japan      1251.0
UK          669.7
US         3333.0
Name: Population in Millions, dtype: float64 

Canada      778.6
France     1359.4
Germany    1676.0
Italy      1178.8
Japan      2502.0
UK         1339.4
US         6666.0
Name: Population in Millions, dtype: float64 

France      2130.014
Germany     3591.100
Canada      1242.182
Italy       1328.028
UK          2495.261
US         27781.083
Japan       3110.452
Name: GDP in Billions, dtype: float64 

France      62.60028
Germany     91.82200
Canada      44.84364
Italy       46.56056
UK          69.90522
US         575.62166
Japan       82.20904
Name: GDP in Billions, dtype: float64 

Mean:  110.71571428571428 

Median:  67.97 

Standard Deviation:  9663.308647778296 



### 4. Access elements using index labels and positions.

In [140]:
print('Population of 3rd element in the List: ', list_series.iloc[2])
print('Population of Japan: ', list_series.loc['Japan'], '\n')

print('GDP of 5th element in the List: ', np_arr_series.iloc[4])
print('GDP of Italy: ', np_arr_series.loc['Italy'], '\n')

print('Continent of 7th element in the List: ', dict_series.iloc[6])
print('Continent of Germany: ', dict_series.loc['Germany'])

Population of 3rd element in the List:  83.8
Population of Japan:  125.1 

GDP of 5th element in the List:  3495.261
GDP of Italy:  2328.028 

Continent of 7th element in the List:  North America
Continent of Germany:  Europe


### 5. Filter the Series to include only values greater than a specific threshold.

In [141]:
print("Countries having GDP greater than 3300 billion")
gdp_greater = np_arr_series[np_arr_series > 3300]
print(gdp_greater, '\n')

print("Countries having Population greater than 80 million")
population_greater = list_series[list_series > 80]
print(population_greater)

Countries having GDP greater than 3300 billion
Germany     4591.100
UK          3495.261
US         28781.083
Japan       4110.452
Name: GDP in Billions, dtype: float64 

Countries having Population greater than 80 million
Germany     83.8
Japan      125.1
US         333.3
Name: Population in Millions, dtype: float64


### 6. Create a DataFrame from a dictionary of lists.

In [142]:
books = {
    "titles": ["Atomic Habits", "Freedom at Midnight", "Mindset: The New Psychology of Success", 
               "The Second Sex", "Men Are from Mars, Women Are from Venus", "The Laws of Human Nature", 
               "The 48 Laws of Power", "Linchpin: Are You Indispensable?", 
               "Think Again: The Power of Knowing What You Don't Know"],
    "genres": ["Self-Help", "History", "Self-Help", "Philosophy", "Relationships", "Psychology", 
               "Strategy", "Business", "Self-Help"],
    "authors": ["James Clear", "Dominique Lapierre and Larry Collins", "Carol Dweck", "Simone de Beauvoir", 
                "John Gray", "Robert Greene", "Robert Greene", "Seth Godin", "Adam Grant"],
    "published_years": [2018, 1975, 2006, 1949, 1992, 1998, 1998, 2011, 2013]
}

df_dict = pd.DataFrame(books)
df_dict

Unnamed: 0,titles,genres,authors,published_years
0,Atomic Habits,Self-Help,James Clear,2018
1,Freedom at Midnight,History,Dominique Lapierre and Larry Collins,1975
2,Mindset: The New Psychology of Success,Self-Help,Carol Dweck,2006
3,The Second Sex,Philosophy,Simone de Beauvoir,1949
4,"Men Are from Mars, Women Are from Venus",Relationships,John Gray,1992
5,The Laws of Human Nature,Psychology,Robert Greene,1998
6,The 48 Laws of Power,Strategy,Robert Greene,1998
7,Linchpin: Are You Indispensable?,Business,Seth Godin,2011
8,Think Again: The Power of Knowing What You Don...,Self-Help,Adam Grant,2013


### 7. Create a DataFrame from a numpy array, specifying column and index names.

In [143]:
np_arr_df = np.array([
    [38.93, 2242.182, 9984670, 0.936, 'Amercia'],
    [67.97, 3130.014, 551695, 0.932, "Europe"],
    [83.8, 4591.100, 357022, 0.932, "Europe"],
    [58.94, 2328.028, 301340, 0.934, "Europe"],
    [125.1, 4110.452, 377975, 0.943, "Asia"],
    [66.97, 3495.261, 243610, 0.939, "Europe"],
    [333.3, 28781.083, 9833520, 0.944, "America"]
])

columns = ['Population', 'GDP', 'Surface Area', 'HDI', 'Continent']
index= ["Canada", "France", "Germany", "Italy", "Japan", "UK", "US"]

df_arr = pd.DataFrame(np_arr_df, columns=columns, index=index)
df_arr

Unnamed: 0,Population,GDP,Surface Area,HDI,Continent
Canada,38.93,2242.182,9984670,0.936,Amercia
France,67.97,3130.014,551695,0.932,Europe
Germany,83.8,4591.1,357022,0.932,Europe
Italy,58.94,2328.028,301340,0.934,Europe
Japan,125.1,4110.452,377975,0.943,Asia
UK,66.97,3495.261,243610,0.939,Europe
US,333.3,28781.083,9833520,0.944,America


### 8. Load a DataFrame from a CSV file.

In [144]:
df = pd.read_csv('HR_Attrition.csv')
df

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,1102,Sales,1,Bachelor,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,279,Research & Development,8,High School,Life Sciences,1,2,...,4,80,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,1373,Research & Development,2,Bachelor,Other,1,4,...,2,80,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,1392,Research & Development,3,PhD,Life Sciences,1,5,...,3,80,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,591,Research & Development,2,High School,Medical,1,7,...,4,80,1,6,3,3,2,2,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1465,36,No,Travel_Frequently,884,Research & Development,23,Bachelor,Medical,1,2061,...,3,80,1,17,3,3,5,2,0,3
1466,39,No,Travel_Rarely,613,Research & Development,6,High School,Medical,1,2062,...,1,80,1,9,5,3,7,7,1,7
1467,27,No,Travel_Rarely,155,Research & Development,4,Master,Life Sciences,1,2064,...,2,80,1,6,0,3,6,2,0,3
1468,49,No,Travel_Frequently,1023,Sales,2,Master,Medical,1,2065,...,4,80,0,17,3,2,9,6,0,8


### 9. Display the first and last five rows of the DataFrame.

In [145]:
df.head()

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,1102,Sales,1,Bachelor,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,279,Research & Development,8,High School,Life Sciences,1,2,...,4,80,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,1373,Research & Development,2,Bachelor,Other,1,4,...,2,80,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,1392,Research & Development,3,PhD,Life Sciences,1,5,...,3,80,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,591,Research & Development,2,High School,Medical,1,7,...,4,80,1,6,3,3,2,2,2,2


In [146]:
df.tail()

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
1465,36,No,Travel_Frequently,884,Research & Development,23,Bachelor,Medical,1,2061,...,3,80,1,17,3,3,5,2,0,3
1466,39,No,Travel_Rarely,613,Research & Development,6,High School,Medical,1,2062,...,1,80,1,9,5,3,7,7,1,7
1467,27,No,Travel_Rarely,155,Research & Development,4,Master,Life Sciences,1,2064,...,2,80,1,6,0,3,6,2,0,3
1468,49,No,Travel_Frequently,1023,Sales,2,Master,Medical,1,2065,...,4,80,0,17,3,2,9,6,0,8
1469,34,No,Travel_Rarely,628,Research & Development,8,Master,Medical,1,2068,...,1,80,0,6,3,4,4,3,1,2


### 10. Get a summary of the DataFrame including the mean, median, and standard deviation of numeric columns.

In [147]:
df.describe()

Unnamed: 0,Age,DailyRate,DistanceFromHome,EmployeeCount,EmployeeNumber,EnvironmentSatisfaction,HourlyRate,JobInvolvement,JobLevel,JobSatisfaction,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
count,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,...,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0,1470.0
mean,36.92381,802.485714,9.192517,1.0,1024.865306,2.721769,65.891156,2.729932,2.063946,2.728571,...,2.712245,80.0,0.793878,11.279592,2.79932,2.761224,7.008163,4.229252,2.187755,4.123129
std,9.135373,403.5091,8.106864,0.0,602.024335,1.093082,20.329428,0.711561,1.10694,1.102846,...,1.081209,0.0,0.852077,7.780782,1.289271,0.706476,6.126525,3.623137,3.22243,3.568136
min,18.0,102.0,1.0,1.0,1.0,1.0,30.0,1.0,1.0,1.0,...,1.0,80.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
25%,30.0,465.0,2.0,1.0,491.25,2.0,48.0,2.0,1.0,2.0,...,2.0,80.0,0.0,6.0,2.0,2.0,3.0,2.0,0.0,2.0
50%,36.0,802.0,7.0,1.0,1020.5,3.0,66.0,3.0,2.0,3.0,...,3.0,80.0,1.0,10.0,3.0,3.0,5.0,3.0,1.0,3.0
75%,43.0,1157.0,14.0,1.0,1555.75,4.0,83.75,3.0,3.0,4.0,...,4.0,80.0,1.0,15.0,3.0,3.0,9.0,7.0,3.0,7.0
max,60.0,1499.0,29.0,1.0,2068.0,4.0,100.0,4.0,5.0,4.0,...,4.0,80.0,3.0,40.0,6.0,4.0,40.0,18.0,15.0,17.0


In [148]:
df.mean(numeric_only=True)

Age                            36.923810
DailyRate                     802.485714
DistanceFromHome                9.192517
EmployeeCount                   1.000000
EmployeeNumber               1024.865306
EnvironmentSatisfaction         2.721769
HourlyRate                     65.891156
JobInvolvement                  2.729932
JobLevel                        2.063946
JobSatisfaction                 2.728571
MonthlyIncome                6502.931293
MonthlyRate                 14313.103401
NumCompaniesWorked              2.693197
PercentSalaryHike              15.209524
PerformanceRating               3.153741
RelationshipSatisfaction        2.712245
StandardHours                  80.000000
StockOptionLevel                0.793878
TotalWorkingYears              11.279592
TrainingTimesLastYear           2.799320
WorkLifeBalance                 2.761224
YearsAtCompany                  7.008163
YearsInCurrentRole              4.229252
YearsSinceLastPromotion         2.187755
YearsWithCurrMan

In [149]:
df.median(numeric_only=True)

Age                            36.0
DailyRate                     802.0
DistanceFromHome                7.0
EmployeeCount                   1.0
EmployeeNumber               1020.5
EnvironmentSatisfaction         3.0
HourlyRate                     66.0
JobInvolvement                  3.0
JobLevel                        2.0
JobSatisfaction                 3.0
MonthlyIncome                4919.0
MonthlyRate                 14235.5
NumCompaniesWorked              2.0
PercentSalaryHike              14.0
PerformanceRating               3.0
RelationshipSatisfaction        3.0
StandardHours                  80.0
StockOptionLevel                1.0
TotalWorkingYears              10.0
TrainingTimesLastYear           3.0
WorkLifeBalance                 3.0
YearsAtCompany                  5.0
YearsInCurrentRole              3.0
YearsSinceLastPromotion         1.0
YearsWithCurrManager            3.0
dtype: float64

In [150]:
df.std(numeric_only=True)

Age                         9.135373e+00
DailyRate                   4.035091e+02
DistanceFromHome            8.106864e+00
EmployeeCount               1.110601e-16
EmployeeNumber              6.020243e+02
EnvironmentSatisfaction     1.093082e+00
HourlyRate                  2.032943e+01
JobInvolvement              7.115611e-01
JobLevel                    1.106940e+00
JobSatisfaction             1.102846e+00
MonthlyIncome               4.707957e+03
MonthlyRate                 7.117786e+03
NumCompaniesWorked          2.498009e+00
PercentSalaryHike           3.659938e+00
PerformanceRating           3.608235e-01
RelationshipSatisfaction    1.081209e+00
StandardHours               0.000000e+00
StockOptionLevel            8.520767e-01
TotalWorkingYears           7.780782e+00
TrainingTimesLastYear       1.289271e+00
WorkLifeBalance             7.064758e-01
YearsAtCompany              6.126525e+00
YearsInCurrentRole          3.623137e+00
YearsSinceLastPromotion     3.222430e+00
YearsWithCurrMan

### 11. Extract a specific column as a Series.

In [151]:
extracted_series = df['Age']
extracted_series

0       41
1       49
2       37
3       33
4       27
        ..
1465    36
1466    39
1467    27
1468    49
1469    34
Name: Age, Length: 1470, dtype: int64

### 12. Filter rows based on column values.

In [152]:
filter_column = df[df['Age'] >= 60]
filter_column

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
411,60,No,Travel_Rarely,422,Research & Development,7,Master,Life Sciences,1,549,...,4,80,0,33,5,1,29,8,11,10
427,60,No,Travel_Frequently,1499,Sales,28,Master,Marketing,1,573,...,4,80,0,22,5,4,18,13,13,11
536,60,No,Travel_Rarely,1179,Sales,16,PhD,Marketing,1,732,...,4,80,0,10,1,3,2,2,2,2
879,60,No,Travel_Rarely,696,Sales,7,PhD,Marketing,1,1233,...,2,80,1,12,3,3,11,7,1,9
1209,60,No,Travel_Rarely,370,Research & Development,1,PhD,Medical,1,1697,...,3,80,1,19,2,4,1,0,0,0


### 13. Select rows based on multiple conditions.

In [153]:
filter_condition = df[(df['Age'] > 35) & (df['Department'] == 'Sales') & (df['Education'] == 'PhD') & (df['EducationField'] == 'Marketing') & (df['DistanceFromHome'] > 10)]
filter_condition

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
91,51,No,Travel_Rarely,632,Sales,21,PhD,Marketing,1,120,...,4,80,0,11,2,1,10,7,1,0
532,47,No,Travel_Rarely,703,Sales,14,PhD,Marketing,1,728,...,4,80,0,20,2,3,7,7,1,7
536,60,No,Travel_Rarely,1179,Sales,16,PhD,Marketing,1,732,...,4,80,0,10,1,3,2,2,2,2
616,51,No,Travel_Rarely,1318,Sales,26,PhD,Marketing,1,851,...,3,80,1,29,2,2,20,6,4,17
628,37,No,Travel_Rarely,342,Sales,16,PhD,Marketing,1,868,...,4,80,2,9,2,3,1,0,0,0
946,40,Yes,Travel_Rarely,299,Sales,25,PhD,Marketing,1,1318,...,3,80,0,9,2,3,5,4,1,0
975,55,Yes,Travel_Rarely,267,Sales,13,PhD,Marketing,1,1372,...,3,80,0,24,2,2,19,7,3,8
1019,36,No,Travel_Rarely,329,Sales,16,PhD,Marketing,1,1436,...,1,80,2,11,3,2,3,2,0,2
1254,49,No,Travel_Rarely,1313,Sales,11,PhD,Marketing,1,1757,...,3,80,0,8,1,4,5,1,0,4
1378,42,No,Travel_Rarely,419,Sales,12,PhD,Marketing,1,1943,...,3,80,2,14,4,3,0,0,0,0


### 14. Add a new column to the DataFrame.

In [154]:
df['new_column'] = "Yes"
df

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager,new_column
0,41,Yes,Travel_Rarely,1102,Sales,1,Bachelor,Life Sciences,1,1,...,80,0,8,0,1,6,4,0,5,Yes
1,49,No,Travel_Frequently,279,Research & Development,8,High School,Life Sciences,1,2,...,80,1,10,3,3,10,7,1,7,Yes
2,37,Yes,Travel_Rarely,1373,Research & Development,2,Bachelor,Other,1,4,...,80,0,7,3,3,0,0,0,0,Yes
3,33,No,Travel_Frequently,1392,Research & Development,3,PhD,Life Sciences,1,5,...,80,0,8,3,3,8,7,3,0,Yes
4,27,No,Travel_Rarely,591,Research & Development,2,High School,Medical,1,7,...,80,1,6,3,3,2,2,2,2,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1465,36,No,Travel_Frequently,884,Research & Development,23,Bachelor,Medical,1,2061,...,80,1,17,3,3,5,2,0,3,Yes
1466,39,No,Travel_Rarely,613,Research & Development,6,High School,Medical,1,2062,...,80,1,9,5,3,7,7,1,7,Yes
1467,27,No,Travel_Rarely,155,Research & Development,4,Master,Life Sciences,1,2064,...,80,1,6,0,3,6,2,0,3,Yes
1468,49,No,Travel_Frequently,1023,Sales,2,Master,Medical,1,2065,...,80,0,17,3,2,9,6,0,8,Yes


### 15. Delete a column from the DataFrame.

In [155]:
df.drop('new_column', axis=1, inplace=True)
df

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,1102,Sales,1,Bachelor,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,279,Research & Development,8,High School,Life Sciences,1,2,...,4,80,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,1373,Research & Development,2,Bachelor,Other,1,4,...,2,80,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,1392,Research & Development,3,PhD,Life Sciences,1,5,...,3,80,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,591,Research & Development,2,High School,Medical,1,7,...,4,80,1,6,3,3,2,2,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1465,36,No,Travel_Frequently,884,Research & Development,23,Bachelor,Medical,1,2061,...,3,80,1,17,3,3,5,2,0,3
1466,39,No,Travel_Rarely,613,Research & Development,6,High School,Medical,1,2062,...,1,80,1,9,5,3,7,7,1,7
1467,27,No,Travel_Rarely,155,Research & Development,4,Master,Life Sciences,1,2064,...,2,80,1,6,0,3,6,2,0,3
1468,49,No,Travel_Frequently,1023,Sales,2,Master,Medical,1,2065,...,4,80,0,17,3,2,9,6,0,8


### 16. Rename columns in the DataFrame.

In [156]:
df.rename(columns={'BusinessTravel': 'Travel', 'DistanceFromHome': 'Distance'}, inplace=True)
df.head()

Unnamed: 0,Age,Attrition,Travel,DailyRate,Department,Distance,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,1102,Sales,1,Bachelor,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,279,Research & Development,8,High School,Life Sciences,1,2,...,4,80,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,1373,Research & Development,2,Bachelor,Other,1,4,...,2,80,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,1392,Research & Development,3,PhD,Life Sciences,1,5,...,3,80,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,591,Research & Development,2,High School,Medical,1,7,...,4,80,1,6,3,3,2,2,2,2
