# DataFrames

In [1]:
import numpy as np
import pandas as pd

In [2]:
# DataFrame:

data = {
    "Name": ["Alice", "Bob", "Charlie", "David", "Eve", "Alice"],
    "Age": [25, 30, 35, np.nan, 29, 25],
    "Department": ["HR", "IT", "Finance", "IT", "HR", "HR"],
    "Salary": [50000, 60000, 70000, 62000, np.nan, 50000],
}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,Department,Salary
0,Alice,25.0,HR,50000.0
1,Bob,30.0,IT,60000.0
2,Charlie,35.0,Finance,70000.0
3,David,,IT,62000.0
4,Eve,29.0,HR,
5,Alice,25.0,HR,50000.0


# Read data in DataFrame

In [3]:
# Display the first n rows (default = 5) -> df.head()
print("First two rows:")
df.head(2)

First two rows:


Unnamed: 0,Name,Age,Department,Salary
0,Alice,25.0,HR,50000.0
1,Bob,30.0,IT,60000.0


In [4]:
# Displays the last n rows (default = 5) -> df.tail()
print("Display last 3 rows:")
df.tail(3)

Display last 3 rows:


Unnamed: 0,Name,Age,Department,Salary
3,David,,IT,62000.0
4,Eve,29.0,HR,
5,Alice,25.0,HR,50000.0


# Indexing iloc

In [5]:
# First row in DataFrame
print("First row:")
df.iloc[0]

First row:


Name            Alice
Age              25.0
Department         HR
Salary        50000.0
Name: 0, dtype: object

In [6]:
print("From row 2 to 4 (last index is not included):")
df.iloc[2:4]

From row 2 to 4 (last index is not included):


Unnamed: 0,Name,Age,Department,Salary
2,Charlie,35.0,Finance,70000.0
3,David,,IT,62000.0


In [7]:
# Reverse the dataframe
print("Reverse Dataframe:")
df.iloc[::-1]

Reverse Dataframe:


Unnamed: 0,Name,Age,Department,Salary
5,Alice,25.0,HR,50000.0
4,Eve,29.0,HR,
3,David,,IT,62000.0
2,Charlie,35.0,Finance,70000.0
1,Bob,30.0,IT,60000.0
0,Alice,25.0,HR,50000.0


In [8]:
print("Rows and Columns:")
df.iloc[1:3, :2]  # rows, columns

Rows and Columns:


Unnamed: 0,Name,Age
1,Bob,30.0
2,Charlie,35.0


# Indexing using loc

In [9]:
# First row
print("First row using loc")
df.loc[0]

First row using loc


Name            Alice
Age              25.0
Department         HR
Salary        50000.0
Name: 0, dtype: object

In [10]:
# From index 3 to 5 (last index is not included)
print("From index 3 to 5:")
df.loc[3:5]

From index 3 to 5:


Unnamed: 0,Name,Age,Department,Salary
3,David,,IT,62000.0
4,Eve,29.0,HR,
5,Alice,25.0,HR,50000.0


In [11]:
# Fetch index 3 to 5 and display only Name and salary
print("Fetch index 3 to 5 and display only `Name` and `salary`:")
df.loc[3:5, ["Name", "Salary"]]

Fetch index 3 to 5 and display only `Name` and `salary`:


Unnamed: 0,Name,Salary
3,David,62000.0
4,Eve,
5,Alice,50000.0


# Without iloc and loc

In [12]:
print("Access just one column:")
df["Name"]

Access just one column:


0      Alice
1        Bob
2    Charlie
3      David
4        Eve
5      Alice
Name: Name, dtype: object

In [13]:
print("Access multiple columns:")
df[["Name", "Salary"]]

Access multiple columns:


Unnamed: 0,Name,Salary
0,Alice,50000.0
1,Bob,60000.0
2,Charlie,70000.0
3,David,62000.0
4,Eve,
5,Alice,50000.0


# Drop Columns

In [14]:
# Drop Age column

# NOTE: The axis for row is 0
#       The axis for column is 1
# So, the Age is a column, we must use pass axis = 1 to drop column ==> df.drop(column_name, axis, inplace)
# Here inplace will perform the operation in the original DataFrame. The default value is `False` which means the changes will be temporary.

print("Drop Age column temporarily:")
df.drop("Age", axis=1)

Drop Age column temporarily:


Unnamed: 0,Name,Department,Salary
0,Alice,HR,50000.0
1,Bob,IT,60000.0
2,Charlie,Finance,70000.0
3,David,IT,62000.0
4,Eve,HR,
5,Alice,HR,50000.0


# Other important functions and attributes

In [15]:
print("Shape of the DataSet:", df.shape)

Shape of the DataSet: (6, 4)


In [16]:
print("Information about the DataFrame:")
# The columns are also known as features

df.info()

Information about the DataFrame:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Name        6 non-null      object 
 1   Age         5 non-null      float64
 2   Department  6 non-null      object 
 3   Salary      5 non-null      float64
dtypes: float64(2), object(2)
memory usage: 324.0+ bytes


In [17]:
# Statistical information of Dataset
print("Statistical information of Dataset:")
df.describe()

Statistical information of Dataset:


Unnamed: 0,Age,Salary
count,5.0,5.0
mean,28.8,58400.0
std,4.147288,8532.291603
min,25.0,50000.0
25%,25.0,50000.0
50%,29.0,60000.0
75%,30.0,62000.0
max,35.0,70000.0


# Broadcasting

In [18]:
# Increment Salary by 5000 for all employees
print("Increment Salary by 5000 for all employees:")
df["Salary"] = df["Salary"] + 5000
df

Increment Salary by 5000 for all employees:


Unnamed: 0,Name,Age,Department,Salary
0,Alice,25.0,HR,55000.0
1,Bob,30.0,IT,65000.0
2,Charlie,35.0,Finance,75000.0
3,David,,IT,67000.0
4,Eve,29.0,HR,
5,Alice,25.0,HR,55000.0


# Modify Dataset

In [19]:
# Rename Department column to Dept
print("Rename Department column to Dept:")
# Here inplace will make sure the changes are done in original DataFrame
df.rename(columns={"Department": "Dept"}, inplace=True)
df

# Renaming multiple columns
# df.rename(columns={"Department": "Dept", "Salary": "CTC"}, inplace=True)

Rename Department column to Dept:


Unnamed: 0,Name,Age,Dept,Salary
0,Alice,25.0,HR,55000.0
1,Bob,30.0,IT,65000.0
2,Charlie,35.0,Finance,75000.0
3,David,,IT,67000.0
4,Eve,29.0,HR,
5,Alice,25.0,HR,55000.0


In [20]:
# Replace one value
print("Replace `Charlie` with `Rose` in Name column:")
# df["Name"].replace("Charlie", "Rose", inplace=True) # Deprecated way
# df["Name"] = df["Name"].replace("Charlie", "Rose")

# replace multiple values
# df["Name"].replace({"Charlie": "Rose", "David": "John"}, inplace=True) # Deprecated way
df["Name"] = df["Name"].replace({"Charlie": "Rose", "David": "John"})
df

Replace `Charlie` with `Rose` in Name column:


Unnamed: 0,Name,Age,Dept,Salary
0,Alice,25.0,HR,55000.0
1,Bob,30.0,IT,65000.0
2,Rose,35.0,Finance,75000.0
3,John,,IT,67000.0
4,Eve,29.0,HR,
5,Alice,25.0,HR,55000.0


In [21]:
# Create new column Bonus as 10% of Salary
print("Create new column Bonus as 10% of Salary:")
df["Bonus"] = df["Salary"] * 5.1
df

Create new column Bonus as 10% of Salary:


Unnamed: 0,Name,Age,Dept,Salary,Bonus
0,Alice,25.0,HR,55000.0,280500.0
1,Bob,30.0,IT,65000.0,331500.0
2,Rose,35.0,Finance,75000.0,382500.0
3,John,,IT,67000.0,341700.0
4,Eve,29.0,HR,,
5,Alice,25.0,HR,55000.0,280500.0


In [22]:
# Get unique values from Dept column
print("Unique values from Dept column:")
df["Dept"].unique()

Unique values from Dept column:


array(['HR', 'IT', 'Finance'], dtype=object)

In [23]:
# Calculate no.of employees in each Dept
print("No.of employees in each Dept:")
df["Dept"].value_counts()

No.of employees in each Dept:


Dept
HR         3
IT         2
Finance    1
Name: count, dtype: int64

In [24]:
# Sum of all salaries
print("Sum of all salaries:")
df["Salary"].sum()

Sum of all salaries:


np.float64(317000.0)

In [25]:
# Remove nulls and calculate sum of salaries
print("Remove nulls and calculate sum of salaries:")
df["Salary"].dropna().sum()

Remove nulls and calculate sum of salaries:


np.float64(317000.0)

In [26]:
# Check for null values
print("Check for null values:")
df.isnull().sum()

Check for null values:


Name      0
Age       1
Dept      0
Salary    1
Bonus     1
dtype: int64

In [27]:
# Drop rows which have null values
print("Drop rows which have null values:")
df.dropna()  # Rows which have null values. Optionally it takes how='all' or 'any' parameter. The default is 'any'

# df.dropna(how="any")

Drop rows which have null values:


Unnamed: 0,Name,Age,Dept,Salary,Bonus
0,Alice,25.0,HR,55000.0,280500.0
1,Bob,30.0,IT,65000.0,331500.0
2,Rose,35.0,Finance,75000.0,382500.0
5,Alice,25.0,HR,55000.0,280500.0


In [28]:
# Drop rows where all values are null
print("Drop rows where all values are null:")
df.dropna(how="all")  # If all the columns have null values, then only drop that row

Drop rows where all values are null:


Unnamed: 0,Name,Age,Dept,Salary,Bonus
0,Alice,25.0,HR,55000.0,280500.0
1,Bob,30.0,IT,65000.0,331500.0
2,Rose,35.0,Finance,75000.0,382500.0
3,John,,IT,67000.0,341700.0
4,Eve,29.0,HR,,
5,Alice,25.0,HR,55000.0,280500.0


In [29]:
# Replace null/missing values in age column with the mean of that column
print("Replace null/missing values in age column with the mean of that column:")
df["Age"].fillna(df["Age"].mean())  # inplace=True to make changes in original DataFrame

Replace null/missing values in age column with the mean of that column:


0    25.0
1    30.0
2    35.0
3    28.8
4    29.0
5    25.0
Name: Age, dtype: float64

In [30]:
# Replace null/missing values in salary column with median of that column
print("Replace null/missing values in salary column with median of that column:")
df["Salary"].fillna(df["Salary"].median())  # inplace=True to make changes in original DataFrame

Replace null/missing values in salary column with median of that column:


0    55000.0
1    65000.0
2    75000.0
3    67000.0
4    65000.0
5    55000.0
Name: Salary, dtype: float64

In [31]:
# Forward fill method

# NOTE: Forward and Backward fills will not work in some cases where the first or last value is null
print("Use Forward fill method to replace null/missing values in age column:")
# df["Age"].fillna(method="ffill")    # Deprecated way
df["Age"].ffill()  # inplace=True to make changes in original DataFrame

Use Forward fill method to replace null/missing values in age column:


0    25.0
1    30.0
2    35.0
3    35.0
4    29.0
5    25.0
Name: Age, dtype: float64

In [32]:
# Backward fill method
print("Use Backward fill method to replace null/missing values in age column:")
df["Age"].bfill()  # inplace=True to make changes in original DataFrame

Use Backward fill method to replace null/missing values in age column:


0    25.0
1    30.0
2    35.0
3    29.0
4    29.0
5    25.0
Name: Age, dtype: float64

In [33]:
# Duplicate values
print("Duplicate values in DataFrame:")

# duplicated takes one argument `keep` which can have values 'first' and 'last'. It means whether to mark duplicates as True except for the first occurrence or last occurrence. The default is 'first'
df_dup = df[df.duplicated(keep="last")]
df_dup

Duplicate values in DataFrame:


Unnamed: 0,Name,Age,Dept,Salary,Bonus
0,Alice,25.0,HR,55000.0,280500.0


In [34]:
# Drop duplicate rows
print("Drop duplicate rows:")

# This fun also takes an argument `keep` similar to duplicated function. It also takes inplace argument to perform operation in original DataFrame
df.drop_duplicates()

Drop duplicate rows:


Unnamed: 0,Name,Age,Dept,Salary,Bonus
0,Alice,25.0,HR,55000.0,280500.0
1,Bob,30.0,IT,65000.0,331500.0
2,Rose,35.0,Finance,75000.0,382500.0
3,John,,IT,67000.0,341700.0
4,Eve,29.0,HR,,


In [35]:
# Invalid values and outliers

# For this example, I'm updating -1 to salary column for 4th index to represent invalid values
df.loc[4, "Salary"] = -1
print("Dealing with Invalid values:")

# Replace -1 values with NaN
df["Salary"] = df["Salary"].apply(lambda x: np.nan if x == -1 else x)
df

Dealing with Invalid values:


Unnamed: 0,Name,Age,Dept,Salary,Bonus
0,Alice,25.0,HR,55000.0,280500.0
1,Bob,30.0,IT,65000.0,331500.0
2,Rose,35.0,Finance,75000.0,382500.0
3,John,,IT,67000.0,341700.0
4,Eve,29.0,HR,,
5,Alice,25.0,HR,55000.0,280500.0


In [36]:
# Str operations on columns with string values

print("Convert Name values to uppercase:")
# Convert to uppercase
df["Name"].str.upper()

# df["Name"] = df["Name"].str.upper() # This will update the Name column with uppercase values in place

Convert Name values to uppercase:


0    ALICE
1      BOB
2     ROSE
3     JOHN
4      EVE
5    ALICE
Name: Name, dtype: object

In [37]:
# split the Name column values

# function to generate random characters
def random_char():
    return np.random.choice(list("ABCDEFGHIJKLMNOPQRSTUVWXYZ"))


# Rename Name column to Full_Name
df.rename(columns={"Name": "Full_Name"}, inplace=True)  # change Name column to Full_Name

# For this example, I'll append _ and some random characters to the Name values and then split based on _
# df["Name"] = df["Name"] + "_" + np.random.choice(list("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), size=len(df))
df["Full_Name"] = df["Full_Name"].apply(lambda x: x + "_" + random_char() if "_" not in x else x)

# Either replace the Name column with first part only
# df["Name"] = df["Name"].str.split("_").str[0]


# Or create new columns from split parts
df[["FName", "LName"]] = df["Full_Name"].str.split("_", expand=True)  # expand=True to split into multiple columns
df

Unnamed: 0,Full_Name,Age,Dept,Salary,Bonus,FName,LName
0,Alice_B,25.0,HR,55000.0,280500.0,Alice,B
1,Bob_H,30.0,IT,65000.0,331500.0,Bob,H
2,Rose_L,35.0,Finance,75000.0,382500.0,Rose,L
3,John_C,,IT,67000.0,341700.0,John,C
4,Eve_E,29.0,HR,,,Eve,E
5,Alice_Z,25.0,HR,55000.0,280500.0,Alice,Z


# Joins

In [38]:
department_info = {
    "Department": ["HR", "IT", "Finance"],
    "Location": ["New York", "San Francisco", "Chicago"],
    "Manager": ["Laura", "Steve", "Nina"],
}
dept_df = pd.DataFrame(department_info)
dept_df

Unnamed: 0,Department,Location,Manager
0,HR,New York,Laura
1,IT,San Francisco,Steve
2,Finance,Chicago,Nina


In [39]:
# Join two Datasets
print("Join two Datasets:")

# concat will take axis parameter to specify whether to join row-wise or column-wise. The default is axis=0 (row-wise)
pd.concat([df, dept_df])  # Here the datasets will be joined row-wise,
# so both datasets must have same columns. If columns are different, then missing values will be filled with NaN

Join two Datasets:


Unnamed: 0,Full_Name,Age,Dept,Salary,Bonus,FName,LName,Department,Location,Manager
0,Alice_B,25.0,HR,55000.0,280500.0,Alice,B,,,
1,Bob_H,30.0,IT,65000.0,331500.0,Bob,H,,,
2,Rose_L,35.0,Finance,75000.0,382500.0,Rose,L,,,
3,John_C,,IT,67000.0,341700.0,John,C,,,
4,Eve_E,29.0,HR,,,Eve,E,,,
5,Alice_Z,25.0,HR,55000.0,280500.0,Alice,Z,,,
0,,,,,,,,HR,New York,Laura
1,,,,,,,,IT,San Francisco,Steve
2,,,,,,,,Finance,Chicago,Nina


In [40]:
# Join two DataSets column-wise
print("Join two DataSets column-wise:")
pd.concat([df, dept_df], axis=1)  # axis=1 to join column-wise
# As both the datasets have different columns, the missing values will be filled with NaN

Join two DataSets column-wise:


Unnamed: 0,Full_Name,Age,Dept,Salary,Bonus,FName,LName,Department,Location,Manager
0,Alice_B,25.0,HR,55000.0,280500.0,Alice,B,HR,New York,Laura
1,Bob_H,30.0,IT,65000.0,331500.0,Bob,H,IT,San Francisco,Steve
2,Rose_L,35.0,Finance,75000.0,382500.0,Rose,L,Finance,Chicago,Nina
3,John_C,,IT,67000.0,341700.0,John,C,,,
4,Eve_E,29.0,HR,,,Eve,E,,,
5,Alice_Z,25.0,HR,55000.0,280500.0,Alice,Z,,,


In [41]:
# Lets change the Dept column name in df to Department to perform concat operation
df.rename(columns={"Dept": "Department"}, inplace=True)
print("STEP-1: Join two Datasets using concat:")

pd.concat(
    [df, dept_df], axis=1
)  # Now both datasets have one column `Department` as common concat will try to join based on that column.
# But since there are duplicate values in Department column in df, the result will have all combinations of those rows. So, this is not the correct way to join datasets based on common columns.

STEP-1: Join two Datasets using concat:


Unnamed: 0,Full_Name,Age,Department,Salary,Bonus,FName,LName,Department.1,Location,Manager
0,Alice_B,25.0,HR,55000.0,280500.0,Alice,B,HR,New York,Laura
1,Bob_H,30.0,IT,65000.0,331500.0,Bob,H,IT,San Francisco,Steve
2,Rose_L,35.0,Finance,75000.0,382500.0,Rose,L,Finance,Chicago,Nina
3,John_C,,IT,67000.0,341700.0,John,C,,,
4,Eve_E,29.0,HR,,,Eve,E,,,
5,Alice_Z,25.0,HR,55000.0,280500.0,Alice,Z,,,


In [42]:
# The concatenation will result in duplicate columns `Department` as both datasets have Department column. To remove duplicate columns, we can use merge function.
print("STEP-1: Merge two Datasets based on common column Department:")
pd.merge(df, dept_df, on="Department")

STEP-1: Merge two Datasets based on common column Department:


Unnamed: 0,Full_Name,Age,Department,Salary,Bonus,FName,LName,Location,Manager
0,Alice_B,25.0,HR,55000.0,280500.0,Alice,B,New York,Laura
1,Bob_H,30.0,IT,65000.0,331500.0,Bob,H,San Francisco,Steve
2,Rose_L,35.0,Finance,75000.0,382500.0,Rose,L,Chicago,Nina
3,John_C,,IT,67000.0,341700.0,John,C,San Francisco,Steve
4,Eve_E,29.0,HR,,,Eve,E,New York,Laura
5,Alice_Z,25.0,HR,55000.0,280500.0,Alice,Z,New York,Laura


In [43]:
# The above 2 steps can be done in a better way.

# Lets join the two datasets based on Dept and Department columns

# Lets rename Department to Dept
df.rename(columns={"Department": "Dept"}, inplace=True)
print("Lets join the two datasets based on Dept and Department columns:")

# here `how` can be 'inner', 'outer', 'left', 'right'
merged_df = pd.merge(df, dept_df, left_on="Dept", right_on="Department", how="inner")
merged_df

Lets join the two datasets based on Dept and Department columns:


Unnamed: 0,Full_Name,Age,Dept,Salary,Bonus,FName,LName,Department,Location,Manager
0,Alice_B,25.0,HR,55000.0,280500.0,Alice,B,HR,New York,Laura
1,Bob_H,30.0,IT,65000.0,331500.0,Bob,H,IT,San Francisco,Steve
2,Rose_L,35.0,Finance,75000.0,382500.0,Rose,L,Finance,Chicago,Nina
3,John_C,,IT,67000.0,341700.0,John,C,IT,San Francisco,Steve
4,Eve_E,29.0,HR,,,Eve,E,HR,New York,Laura
5,Alice_Z,25.0,HR,55000.0,280500.0,Alice,Z,HR,New York,Laura


# Read CSV file

In [44]:
# Read CSV file
data = pd.read_csv("../data.csv")

# Display info about the dataset
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   id             100 non-null    int64 
 1   first_name     100 non-null    object
 2   last_name      95 non-null     object
 3   email          98 non-null     object
 4   gender         88 non-null     object
 5   birth_date     95 non-null     object
 6   time_of_birth  94 non-null     object
 7   is_active      95 non-null     object
 8   address        88 non-null     object
 9   country        93 non-null     object
 10  timezone       92 non-null     object
dtypes: int64(1), object(10)
memory usage: 8.7+ KB


In [45]:
# Shape of Dataset
data.shape

(100, 11)

In [46]:
# Have a look at the data
data.head()  # This will display first 5 rows

Unnamed: 0,id,first_name,last_name,email,gender,birth_date,time_of_birth,is_active,address,country,timezone
0,1,Veda,Pavey,vpavey0@delicious.com,Female,7/7/2017,12:44:48,False,,Indonesia,Asia/Jakarta
1,2,Phylis,Fritzer,pfritzer1@posterous.com,Female,9/29/2014,17:15:20,False,,Indonesia,Asia/Jakarta
2,3,Tim,Bigby,tbigby2@amazonaws.com,Male,5/11/2025,7:07:32,False,7th Floor,China,Asia/Chongqing
3,4,Sherill,Arkley,sarkley3@seattletimes.com,Female,3/11/2013,21:42:11,False,Suite 30,China,Asia/Shanghai
4,5,Jared,Burdytt,,Male,9/28/2009,22:41:39,True,18th Floor,China,Asia/Chongqing


In [47]:
data.tail()

Unnamed: 0,id,first_name,last_name,email,gender,birth_date,time_of_birth,is_active,address,country,timezone
95,96,Emelina,Stockford,estockford2n@symantec.com,Polygender,3/16/2017,6:50:38,True,Apt 78,United States,America/New_York
96,97,Alayne,Grishankov,agrishankov2o@rambler.ru,Female,10/27/2002,15:37:42,True,Suite 16,Russia,Europe/Moscow
97,98,Tatum,Lovegrove,tlovegrove2p@meetup.com,Female,4/22/2005,11:49:56,False,,Venezuela,America/Caracas
98,99,Shellie,Novis,snovis2q@usa.gov,Female,12/17/1997,20:36:33,False,Apt 935,Malaysia,Asia/Kuala_Lumpur
99,100,Sheila,Cowell,scowell2r@about.com,Female,11/21/1997,21:29:01,False,Suite 70,Venezuela,America/Caracas


In [48]:
# Convert columns to actual data types
data["birth_date"] = pd.to_datetime(data["birth_date"])  # Convert object to datetime
data["time_of_birth"] = pd.to_timedelta(data["time_of_birth"])  # Convert object to timedelta
data["is_active"] = data["is_active"].astype(bool)  # Convert object to boolean

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype          
---  ------         --------------  -----          
 0   id             100 non-null    int64          
 1   first_name     100 non-null    object         
 2   last_name      95 non-null     object         
 3   email          98 non-null     object         
 4   gender         88 non-null     object         
 5   birth_date     95 non-null     datetime64[ns] 
 6   time_of_birth  94 non-null     timedelta64[ns]
 7   is_active      100 non-null    bool           
 8   address        88 non-null     object         
 9   country        93 non-null     object         
 10  timezone       92 non-null     object         
dtypes: bool(1), datetime64[ns](1), int64(1), object(7), timedelta64[ns](1)
memory usage: 8.0+ KB


In [49]:
data.head()

Unnamed: 0,id,first_name,last_name,email,gender,birth_date,time_of_birth,is_active,address,country,timezone
0,1,Veda,Pavey,vpavey0@delicious.com,Female,2017-07-07,0 days 12:44:48,False,,Indonesia,Asia/Jakarta
1,2,Phylis,Fritzer,pfritzer1@posterous.com,Female,2014-09-29,0 days 17:15:20,False,,Indonesia,Asia/Jakarta
2,3,Tim,Bigby,tbigby2@amazonaws.com,Male,2025-05-11,0 days 07:07:32,False,7th Floor,China,Asia/Chongqing
3,4,Sherill,Arkley,sarkley3@seattletimes.com,Female,2013-03-11,0 days 21:42:11,False,Suite 30,China,Asia/Shanghai
4,5,Jared,Burdytt,,Male,2009-09-28,0 days 22:41:39,True,18th Floor,China,Asia/Chongqing
