# 3.0 Introduction 

In [5]:
import pandas as pd
# Create url
url = 'https://tinyurl.com/titanic-csv'
# Load as a dataframe
dataframe = pd.read_csv(url)
# Show first 5 rows
dataframe.head(5)

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,1
4,"Allison, Master Hudson Trevor",1st,0.92,male,1,0


# 3.1 Creating a Data Frame

In [6]:
import pandas as pd
# Create DataFrame
dataframe = pd.DataFrame()
# Add columns
dataframe['Name'] = ['Jacky Jackson', 'Steve Stevenson']
dataframe['Age'] = [38, 25]
dataframe['Driver'] = [True, False]
# Show DataFrame
dataframe

Unnamed: 0,Name,Age,Driver
0,Jacky Jackson,38,True
1,Steve Stevenson,25,False


In [7]:
# Create row
new_person = pd.Series(['Molly Mooney', 40, True], 
                       index=['Name', 'Age', 'Driver'])
# Append row
dataframe.append(new_person, ignore_index=True)

Unnamed: 0,Name,Age,Driver
0,Jacky Jackson,38,True
1,Steve Stevenson,25,False
2,Molly Mooney,40,True


# 3.2 Describing the Data

In [8]:
import pandas as pd
# Create url
url = 'https://tinyurl.com/titanic-csv'
# Load as a dataframe
dataframe = pd.read_csv(url)
# Show first 2 rows
dataframe.head(2)

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1


In [19]:
# Show dimensions
print('Show dimensions', dataframe.shape)
print()
# Show statistics
print(dataframe.describe())

Show dimensions (1313, 6)

              Age     Survived      SexCode
count  756.000000  1313.000000  1313.000000
mean    30.397989     0.342727     0.351866
std     14.259049     0.474802     0.477734
min      0.170000     0.000000     0.000000
25%     21.000000     0.000000     0.000000
50%     28.000000     0.000000     0.000000
75%     39.000000     1.000000     1.000000
max     71.000000     1.000000     1.000000


# 3.3 Navigationg DataFrames

In [20]:
import pandas as pd
# Create url
url = 'https://tinyurl.com/titanic-csv'
# Load as a dataframe
dataframe = pd.read_csv(url)
# Select first row
dataframe.iloc[0]

Name        Allen, Miss Elisabeth Walton
PClass                               1st
Age                                   29
Sex                               female
Survived                               1
SexCode                                1
Name: 0, dtype: object

In [21]:
# Select three rows ( row 2, 3 and 4)
dataframe.iloc[1:4]

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,1


In [22]:
# Select 4 rows ( row 1, 2, 3 and 4)
dataframe.iloc[:4]

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,1


In [23]:
# Set index
dataframe = dataframe.set_index(dataframe['Name'])
# Show ros
dataframe.loc['Allen, Miss Elisabeth Walton']

Name        Allen, Miss Elisabeth Walton
PClass                               1st
Age                                   29
Sex                               female
Survived                               1
SexCode                                1
Name: Allen, Miss Elisabeth Walton, dtype: object

# 3.4 Selecting Rows Based on Conditions

In [24]:
import pandas as pd
# Create url
url = 'https://tinyurl.com/titanic-csv'
# Load as a dataframe
dataframe = pd.read_csv(url)
# Show top two rows where column 'sex' is 'female'
dataframe[dataframe['Sex'] == 'female'].head(2)

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1


In [25]:
# Filter rows
dataframe[(dataframe['Sex'] == 'female') & (dataframe['Age'] >= 65)]

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
73,"Crosby, Mrs Edward Gifford (Catherine Elizabet...",1st,69.0,female,1,1


# 3.5 Replacing Values

In [30]:
import pandas as pd
# Create url
url = 'https://tinyurl.com/titanic-csv'
# Load as a dataframe
dataframe = pd.read_csv(url)
# Replace values, show two rows
dataframe['Sex'].replace('female', 'Woman').head(2)

0    Woman
1    Woman
Name: Sex, dtype: object

In [32]:
# Replace 'female' and 'male' with 'Woman' and 'Man'
dataframe['Sex'].replace(['female', 'male'], ['Woman', 'Man']).head()

0    Woman
1    Woman
2      Man
3    Woman
4      Man
Name: Sex, dtype: object

In [33]:
# Replace values
dataframe.replace(1, 'One').head()

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,One,One
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,One
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,One
4,"Allison, Master Hudson Trevor",1st,0.92,male,One,0


In [34]:
# Replace values
dataframe.replace(r'1st', 'First', regex=True).head()

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",First,29.0,female,1,1
1,"Allison, Miss Helen Loraine",First,2.0,female,0,1
2,"Allison, Mr Hudson Joshua Creighton",First,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",First,25.0,female,0,1
4,"Allison, Master Hudson Trevor",First,0.92,male,1,0


# 3.6 Renaming Columns

In [35]:
import pandas as pd
# Create url
url = 'https://tinyurl.com/titanic-csv'
# Load as a dataframe
dataframe = pd.read_csv(url)
# Rename column
dataframe.rename(columns={'PClass':'Passenger Class'}).head()

Unnamed: 0,Name,Passenger Class,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,1
4,"Allison, Master Hudson Trevor",1st,0.92,male,1,0


In [36]:
# Rename column
dataframe.rename(columns={'PClass':'Passenger Class', 'Sex':'Gender'}).head()

Unnamed: 0,Name,Passenger Class,Age,Gender,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,1
4,"Allison, Master Hudson Trevor",1st,0.92,male,1,0


Rename all column at once.

In [37]:
import collections
# Create dictionary
column_names = collections.defaultdict(str)
# Create keys
for name in dataframe.columns:
    column_names[name]
# Show dictionary
column_names

defaultdict(str,
            {'Age': '',
             'Name': '',
             'PClass': '',
             'Sex': '',
             'SexCode': '',
             'Survived': ''})

# 3.7 Finding the Minimum, Maximum, Sum, Average and Count

In [43]:
import pandas as pd
# Create url
url = 'https://tinyurl.com/titanic-csv'
# Load as a dataframe
dataframe = pd.read_csv(url)
# Calculate Statistics
print('Maximum:', dataframe['Age'].max())
print('Minimum:', dataframe['Age'].min())
print('Mean:', dataframe['Age'].mean())
print('Sum:', dataframe['Age'].sum())
print('Count:', dataframe['Age'].count())


Maximum: 71.0
Minimum: 0.17
Mean: 30.397989417989415
Sum: 22980.88
Count: 756


In [42]:
# Show counts
dataframe.count()

Name        1313
PClass      1313
Age          756
Sex         1313
Survived    1313
SexCode     1313
dtype: int64

# 3.8 Finding Unique Values

In [44]:
import pandas as pd
# Create url
url = 'https://tinyurl.com/titanic-csv'
# Load as a dataframe
dataframe = pd.read_csv(url)
# Select unique values
dataframe['Sex'].unique()

array(['female', 'male'], dtype=object)

In [45]:
# Show counts
dataframe['Sex'].value_counts()

male      851
female    462
Name: Sex, dtype: int64

In [46]:
# Show counts
dataframe['PClass'].value_counts()

3rd    711
1st    322
2nd    279
*        1
Name: PClass, dtype: int64

# 3.9 Handling Missing Values

In [47]:
import pandas as pd
# Create url
url = 'https://tinyurl.com/titanic-csv'
# Load as a dataframe
dataframe = pd.read_csv(url)
# Select missing values
dataframe[dataframe['Age'].isnull()].head()

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
12,"Aubert, Mrs Leontine Pauline",1st,,female,1,1
13,"Barkworth, Mr Algernon H",1st,,male,1,0
14,"Baumann, Mr John D",1st,,male,0,0
29,"Borebank, Mr John James",1st,,male,0,0
32,"Bradley, Mr George",1st,,male,1,0


To have fully functionality with NaN.

In [48]:
import numpy as np
# Replace values with NaN
dataframe['Sex'] = dataframe['Sex'].replace('male', np.nan)

Or using a specific value to denote a missing observation,
such as NONE, -999 or...

In [49]:
# Load data, set missing values
dataframe = pd.read_csv(url, na_values=[np.nan, 'NONE', -999])

# 3.10 Deleting a Column

In [52]:
import pandas as pd
# Create url
url = 'https://tinyurl.com/titanic-csv'
# Load as a dataframe
dataframe = pd.read_csv(url)
# Delete column
dataframe.drop('Age', axis=1).head()

Unnamed: 0,Name,PClass,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,female,1,1
1,"Allison, Miss Helen Loraine",1st,female,0,1
2,"Allison, Mr Hudson Joshua Creighton",1st,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,female,0,1
4,"Allison, Master Hudson Trevor",1st,male,1,0


In [53]:
# Drop multiple columns
dataframe.drop(['Age', 'Sex'], axis=1).head()

Unnamed: 0,Name,PClass,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,1,1
1,"Allison, Miss Helen Loraine",1st,0,1
2,"Allison, Mr Hudson Joshua Creighton",1st,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,0,1
4,"Allison, Master Hudson Trevor",1st,1,0


In [54]:
# Drop the column by its index
dataframe.drop(dataframe.columns[1], axis=1).head()

Unnamed: 0,Name,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",29.0,female,1,1
1,"Allison, Miss Helen Loraine",2.0,female,0,1
2,"Allison, Mr Hudson Joshua Creighton",30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",25.0,female,0,1
4,"Allison, Master Hudson Trevor",0.92,male,1,0


# 3.11 Deleting a Row

In [55]:
import pandas as pd
# Create url
url = 'https://tinyurl.com/titanic-csv'
# Load as a dataframe
dataframe = pd.read_csv(url)
# Delete rows
dataframe[dataframe['Sex'] != 'male'].head()

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,1
6,"Andrews, Miss Kornelia Theodosia",1st,63.0,female,1,1
8,"Appleton, Mrs Edward Dale (Charlotte Lamson)",1st,58.0,female,1,1


In [56]:
# Delete row
dataframe[dataframe['Name'] != 'Allison, Miss Helen Loraine'].head()

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,1
4,"Allison, Master Hudson Trevor",1st,0.92,male,1,0
5,"Anderson, Mr Harry",1st,47.0,male,1,0


In [57]:
# Delete row
dataframe[dataframe.index != 0].head()

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,1
4,"Allison, Master Hudson Trevor",1st,0.92,male,1,0
5,"Anderson, Mr Harry",1st,47.0,male,1,0


# 3.12 Dropping Duplicate Rows

In [58]:
import pandas as pd
# Create url
url = 'https://tinyurl.com/titanic-csv'
# Load as a dataframe
dataframe = pd.read_csv(url)
# Dropping duplicates
dataframe.drop_duplicates().head()

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,1
4,"Allison, Master Hudson Trevor",1st,0.92,male,1,0


In [59]:
# Show number of rows
print('Number of rows in the original dataframe: ', len(dataframe))
print('Number of rows after Deduping: ', len(dataframe.drop_duplicates()))


Number of rows in the original dataframe:  1313
Number of rows after Deduping:  1313


In [60]:
# Drop duplicates
dataframe.drop_duplicates(subset=['Sex'])

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0


In [61]:
# Drop duplicates
dataframe.drop_duplicates(subset=['Sex'], keep='last')

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
1307,"Zabour, Miss Tamini",3rd,,female,0,1
1312,"Zimmerman, Leo",3rd,29.0,male,0,0


# 3.13 Grouping Rows by Values

In [62]:
import pandas as pd
# Create url
url = 'https://tinyurl.com/titanic-csv'
# Load as a dataframe
dataframe = pd.read_csv(url)
# Group rows by the values of the column 'Sex', calculate mean of each group
dataframe.groupby('Sex').mean()

Unnamed: 0_level_0,Age,Survived,SexCode
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,29.396424,0.666667,1.0
male,31.014338,0.166863,0.0


In [63]:
# Group rows
dataframe.groupby('Sex')

<pandas.core.groupby.DataFrameGroupBy object at 0x10c8375f8>

In [64]:
# Group rows, count rows
dataframe.groupby('Survived')['Name'].count()

Survived
0    863
1    450
Name: Name, dtype: int64

In [65]:
# Group rows, calculate mean
dataframe.groupby(['Sex', 'Survived'])['Age'].mean()

Sex     Survived
female  0           24.901408
        1           30.867143
male    0           32.320780
        1           25.951875
Name: Age, dtype: float64

# 3.14 Grouping Rows by Time

In [67]:
import pandas as pd
import numpy as np
# Create data range
time_index = pd.date_range('06/06/2017', periods=100000, freq='30S')
# Create DataFrame
dataframe = pd.DataFrame(index=time_index)
# Create column of random values
dataframe['Sale_Amount'] = np.random.randint(1, 10, 100000)
# Group rows by week, calculate sum per week
dataframe.resample('W').sum()

Unnamed: 0,Sale_Amount
2017-06-11,86171
2017-06-18,100798
2017-06-25,100670
2017-07-02,100705
2017-07-09,100851
2017-07-16,10476


In [68]:
# Show first three rows
dataframe.head(3)

Unnamed: 0,Sale_Amount
2017-06-06 00:00:00,2
2017-06-06 00:00:30,3
2017-06-06 00:01:00,5


In [70]:
# Show last three rows
dataframe.tail(3)

Unnamed: 0,Sale_Amount
2017-07-10 17:18:30,8
2017-07-10 17:19:00,6
2017-07-10 17:19:30,2


In [71]:
# Group by two weeks, calculate mean
dataframe.resample('2W').mean()

Unnamed: 0,Sale_Amount
2017-06-11,4.986748
2017-06-25,4.996726
2017-07-09,4.998909
2017-07-23,5.036538


In [72]:
# Group by month, count rows
dataframe.resample('M').count()

Unnamed: 0,Sale_Amount
2017-06-30,72000
2017-07-31,28000


In [73]:
# Group by month, count rows
dataframe.resample('M', label='left').count()

Unnamed: 0,Sale_Amount
2017-05-31,72000
2017-06-30,28000


# 3.15 Looping Over a Column

In [74]:
import pandas as pd
# Create url
url = 'https://tinyurl.com/titanic-csv'
# Load as a dataframe
dataframe = pd.read_csv(url)
# print first two names uppercased
for name in dataframe['Name'][0:2]:
    print(name.upper())

ALLEN, MISS ELISABETH WALTON
ALLISON, MISS HELEN LORAINE


# 3.16 Applying a Function Over All Elements in a Column

In [75]:
import pandas as pd
# Create url
url = 'https://tinyurl.com/titanic-csv'
# Load as a dataframe
dataframe = pd.read_csv(url)
# Create function
def uppercase(x):
    return x.upper()
# Apply function, show two rows
dataframe['Name'].apply(uppercase)[0:2]

0    ALLEN, MISS ELISABETH WALTON
1     ALLISON, MISS HELEN LORAINE
Name: Name, dtype: object

# 3.17 Applying a Function to Groups

In [76]:
import pandas as pd
# Create url
url = 'https://tinyurl.com/titanic-csv'
# Load as a dataframe
dataframe = pd.read_csv(url)
# Group rows, apply function to groups
dataframe.groupby('Sex').apply(lambda x: x.count())

Unnamed: 0_level_0,Name,PClass,Age,Sex,Survived,SexCode
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
female,462,462,288,462,462,462
male,851,851,468,851,851,851


# 3.18 Concatenating DataFrames

In [77]:
import pandas as pd
# Create DataFrame
data_a = {'id': ['1', '2', '3'],
         'first': ['Alex', 'Amy', 'Allen'],
         'last': ['Anderson', 'Ackerman', 'Ali']}
dataframe_a = pd.DataFrame(data_a, columns = ['id', 'first', 'last'])

# Create DataFrame
data_b = {'id': ['4', '5', '6'],
         'first': ['Billy', 'Brian', 'Bran'],
         'last': ['Bonder', 'Blace', 'Balwner']}
dataframe_b = pd.DataFrame(data_b, columns = ['id', 'first', 'last'])

# Concatenate DataFrames by rows
pd.concat([dataframe_a, dataframe_b], axis=0)

Unnamed: 0,id,first,last
0,1,Alex,Anderson
1,2,Amy,Ackerman
2,3,Allen,Ali
0,4,Billy,Bonder
1,5,Brian,Blace
2,6,Bran,Balwner


In [78]:
# Concatenate DataFrames by columns
pd.concat([dataframe_a, dataframe_b], axis=1)

Unnamed: 0,id,first,last,id.1,first.1,last.1
0,1,Alex,Anderson,4,Billy,Bonder
1,2,Amy,Ackerman,5,Brian,Blace
2,3,Allen,Ali,6,Bran,Balwner


Use append to add a new row to a DataFrame.

In [83]:
# Create row
row = pd.Series([100, 'Chris', 'Chillon'], index = ['id', 'first', 'last'])
# Append row
dataframe_a.append(row, ignore_index=True)

Unnamed: 0,id,first,last
0,1,Alex,Anderson
1,2,Amy,Ackerman
2,3,Allen,Ali
3,100,Chris,Chillon


# 3.19 Merging DataFrames

In [85]:
import pandas as pd 
# Create DataFrame
employee_data = {'employee_id' : ['1', '2', '3', '4'],
                'name' : ['Amy Jones', 'Allen Keys', 'Alice Bees'
                          , 'Tim Horton']}
dataframe_employees = pd.DataFrame(employee_data, 
                                   columns = ['employee_id', 
                                              'name'])

# Create DataFrame
sales_data = {'employee_id' : ['3', '4', '5', '6'],
                'total_sales' : ['23455', '2341', '3823', '2313']}
dataframe_sales = pd.DataFrame(sales_data, 
                               columns = ['employee_id', 
                                          'total_sales'])

# Merge DataFrames
pd.merge(dataframe_employees, dataframe_sales, on='employee_id')

Unnamed: 0,employee_id,name,total_sales
0,3,Alice Bees,23455
1,4,Tim Horton,2341


In [86]:
# Merge DataFrame - outer join
pd.merge(dataframe_employees, dataframe_sales, 
         on='employee_id', 
         how='outer')

Unnamed: 0,employee_id,name,total_sales
0,1,Amy Jones,
1,2,Allen Keys,
2,3,Alice Bees,23455.0
3,4,Tim Horton,2341.0
4,5,,3823.0
5,6,,2313.0


In [88]:
# Merge DataFrames - left join
pd.merge(dataframe_employees, dataframe_sales, 
         on='employee_id',
         how='left')

Unnamed: 0,employee_id,name,total_sales
0,1,Amy Jones,
1,2,Allen Keys,
2,3,Alice Bees,23455.0
3,4,Tim Horton,2341.0


In [89]:
# Merge DataFrames - right join
pd.merge(dataframe_employees, dataframe_sales, 
         on='employee_id',
         how='right')

Unnamed: 0,employee_id,name,total_sales
0,3,Alice Bees,23455
1,4,Tim Horton,2341
2,5,,3823
3,6,,2313


In [90]:
# Merge DataFrames - specify the column name
pd.merge(dataframe_employees, 
         dataframe_sales, 
         left_on='employee_id',
         right_on='employee_id')

Unnamed: 0,employee_id,name,total_sales
0,3,Alice Bees,23455
1,4,Tim Horton,2341


In [91]:
# Merge DataFrames - merge on index
pd.merge(dataframe_employees, 
         dataframe_sales, 
         right_index=True,
         left_index=True)

Unnamed: 0,employee_id_x,name,employee_id_y,total_sales
0,1,Amy Jones,3,23455
1,2,Allen Keys,4,2341
2,3,Alice Bees,5,3823
3,4,Tim Horton,6,2313
