# Pandas

In [34]:
import pandas as pd
import numpy as np


## Working with Series in Pandas

In [35]:
s = pd.Series([1,2,3,4,5,6,7,8,9,10])
s.name = "Calories"
s

Unnamed: 0,Calories
0,1
1,2
2,3
3,4
4,5
5,6
6,7
7,8
8,9
9,10


In [36]:
s.dtype


dtype('int64')

In [37]:
s.values

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [38]:
s.index

RangeIndex(start=0, stop=10, step=1)

### Using "iloc" & "loc" Functions


In [39]:
#iloc gives the value at the particular index mentioned in the brackets with the datatype.
s.iloc[3] #single value search

np.int64(4)

In [52]:
s.iloc[[0,2,4,7,8]] #multi value search

Unnamed: 0_level_0,Calories
Fruits,Unnamed: 1_level_1
Apple,1
Mango,3
Kiwi,5
Banana,8
Watermelon,9


In [53]:
#loc is used when the indexs are names and not numeric values anymore so it is label based indexing
s.loc["Grapes"]

np.int64(2)

### Labeling the index

In [None]:
index = ['Apple','Grapes','Mango','Pineapple','Kiwi','Orange','Papaya','Banana','Watermelon','Muskmelon']

In [44]:
s.index = index
s.index.name = "Fruits"
s

Unnamed: 0_level_0,Calories
Fruits,Unnamed: 1_level_1
Apple,1
Grapes,2
Mango,3
Pineapple,4
Kiwi,5
Orange,6
Papaya,7
Banana,8
Watermelon,9
Muskmelon,10


In [45]:
s["Papaya":"Banana"] # In label based indexing both start and ending values are inclued in the output

Unnamed: 0_level_0,Calories
Fruits,Unnamed: 1_level_1
Papaya,7
Banana,8


### Using Dictionary as a Series

In [46]:
fruit_protein = {
    "Guava": 2.6,
    "Avocado": 2.0,
    "Jackfruit": 1.7,
    "Dried Figs": 3.6,
    "Raisins": 3.1,
    "Blackberries": 2.0,
    "Banana": 1.1,
    "Orange": 0.9,
    "Apple": 0.3,
    "Watermelon": 0.6
} #Sample Dictionary


In [47]:
s2 = pd.Series(fruit_protein, name="Protein")
s2.index.name = "Fruits"
s2

Unnamed: 0_level_0,Protein
Fruits,Unnamed: 1_level_1
Guava,2.6
Avocado,2.0
Jackfruit,1.7
Dried Figs,3.6
Raisins,3.1
Blackberries,2.0
Banana,1.1
Orange,0.9
Apple,0.3
Watermelon,0.6


In [48]:
s2["Guava":"Orange"]

Unnamed: 0_level_0,Protein
Fruits,Unnamed: 1_level_1
Guava,2.6
Avocado,2.0
Jackfruit,1.7
Dried Figs,3.6
Raisins,3.1
Blackberries,2.0
Banana,1.1
Orange,0.9


In [49]:
s2.loc[["Raisins","Banana"]]

Unnamed: 0_level_0,Protein
Fruits,Unnamed: 1_level_1
Raisins,3.1
Banana,1.1


### Conditional Selection

In [50]:
s2>1

Unnamed: 0_level_0,Protein
Fruits,Unnamed: 1_level_1
Guava,True
Avocado,True
Jackfruit,True
Dried Figs,True
Raisins,True
Blackberries,True
Banana,True
Orange,False
Apple,False
Watermelon,False


In [54]:
s2[s2>1]

Unnamed: 0_level_0,Protein
Fruits,Unnamed: 1_level_1
Guava,2.6
Avocado,2.0
Jackfruit,1.7
Dried Figs,3.6
Raisins,3.1
Blackberries,2.0
Banana,1.1


### Logical Operations

In [55]:
# and operation
s2[(s2>0.5)&(s2<2)]

Unnamed: 0_level_0,Protein
Fruits,Unnamed: 1_level_1
Jackfruit,1.7
Banana,1.1
Orange,0.9
Watermelon,0.6


In [56]:
# or operation
s2[(s2<2)|(s2<5)]

Unnamed: 0_level_0,Protein
Fruits,Unnamed: 1_level_1
Guava,2.6
Avocado,2.0
Jackfruit,1.7
Dried Figs,3.6
Raisins,3.1
Blackberries,2.0
Banana,1.1
Orange,0.9
Apple,0.3
Watermelon,0.6


In [57]:
# not Operation
s2[~(s2>2)]

Unnamed: 0_level_0,Protein
Fruits,Unnamed: 1_level_1
Avocado,2.0
Jackfruit,1.7
Blackberries,2.0
Banana,1.1
Orange,0.9
Apple,0.3
Watermelon,0.6


### Modifying the series

In [58]:
# To change the values using the indexes
s2["Orange"] = 2.6
s2

Unnamed: 0_level_0,Protein
Fruits,Unnamed: 1_level_1
Guava,2.6
Avocado,2.0
Jackfruit,1.7
Dried Figs,3.6
Raisins,3.1
Blackberries,2.0
Banana,1.1
Orange,2.6
Apple,0.3
Watermelon,0.6


In [59]:
ser = pd.Series(['a', np.nan, 1, np.nan ,2])
s.notnull().sum()

np.int64(10)

## Working with DataFrames in Pandas


In [60]:
import numpy as np
import pandas as pd


data = {
    "Name": ["Anya", "Ravi", "John", "Meera", "Ravi", "Tina", "John"],
    "Age": [24, 31, 29, np.nan, 31, 26, 29],
    "Department": ["Marketing", "IT", "HR", "Finance", "IT", "Marketing", "HR"],
    "Salary": [52000, 63000, 58000, 67000, 63000, np.nan, 58000]
}


In [61]:
data


{'Name': ['Anya', 'Ravi', 'John', 'Meera', 'Ravi', 'Tina', 'John'],
 'Age': [24, 31, 29, nan, 31, 26, 29],
 'Department': ['Marketing', 'IT', 'HR', 'Finance', 'IT', 'Marketing', 'HR'],
 'Salary': [52000, 63000, 58000, 67000, 63000, nan, 58000]}

### Using DataFrames to arrange the data in a format

In [62]:
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,Department,Salary
0,Anya,24.0,Marketing,52000.0
1,Ravi,31.0,IT,63000.0
2,John,29.0,HR,58000.0
3,Meera,,Finance,67000.0
4,Ravi,31.0,IT,63000.0
5,Tina,26.0,Marketing,
6,John,29.0,HR,58000.0


### Using the "head" function

In [None]:
df.head(2) #used to get top values and we can choose how many values we want

### Using the "tail" function

In [63]:
df.tail() #has 5 as defualt value and it can be changed
df.tail(3)

Unnamed: 0,Name,Age,Department,Salary
4,Ravi,31.0,IT,63000.0
5,Tina,26.0,Marketing,
6,John,29.0,HR,58000.0


### using the "iloc" & "loc" functions

In [64]:
df.iloc[1:4] #includes only values from 1 to 3

Unnamed: 0,Name,Age,Department,Salary
1,Ravi,31.0,IT,63000.0
2,John,29.0,HR,58000.0
3,Meera,,Finance,67000.0


In [65]:
df.loc[1:4] #includes all the values form specified range (incluing last number)
df.loc[1:4, ["Age","Salary"]]

Unnamed: 0,Age,Salary
1,31.0,63000.0
2,29.0,58000.0
3,,67000.0
4,31.0,63000.0


### Accessing single and multiple columns

In [66]:
df["Age"]

Unnamed: 0,Age
0,24.0
1,31.0
2,29.0
3,
4,31.0
5,26.0
6,29.0


In [67]:
df[['Age',"Department"]]

Unnamed: 0,Age,Department
0,24.0,Marketing
1,31.0,IT
2,29.0,HR
3,,Finance
4,31.0,IT
5,26.0,Marketing
6,29.0,HR


### Droping a Column

In [68]:
df.drop("Salary",axis=1)
df

Unnamed: 0,Name,Age,Department,Salary
0,Anya,24.0,Marketing,52000.0
1,Ravi,31.0,IT,63000.0
2,John,29.0,HR,58000.0
3,Meera,,Finance,67000.0
4,Ravi,31.0,IT,63000.0
5,Tina,26.0,Marketing,
6,John,29.0,HR,58000.0


### Using "inplace" to delete the row or column in the orginal dataframe

In [69]:
# "inplace = True" is used in the "drop" to delete the column or teh row completly from the dataframe
df.drop("Salary", axis = 1, inplace = True)

In [70]:
df

Unnamed: 0,Name,Age,Department
0,Anya,24.0,Marketing
1,Ravi,31.0,IT
2,John,29.0,HR
3,Meera,,Finance
4,Ravi,31.0,IT
5,Tina,26.0,Marketing
6,John,29.0,HR


In [71]:
df.shape

(7, 3)

In [72]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Name        7 non-null      object 
 1   Age         6 non-null      float64
 2   Department  7 non-null      object 
dtypes: float64(1), object(2)
memory usage: 300.0+ bytes


In [73]:
df.describe()

Unnamed: 0,Age
count,6.0
mean,28.333333
std,2.804758
min,24.0
25%,26.75
50%,29.0
75%,30.5
max,31.0


### Broadcasting

In [74]:
df["Age"] = df["Age"] + 1000

In [75]:
df

Unnamed: 0,Name,Age,Department
0,Anya,1024.0,Marketing
1,Ravi,1031.0,IT
2,John,1029.0,HR
3,Meera,,Finance
4,Ravi,1031.0,IT
5,Tina,1026.0,Marketing
6,John,1029.0,HR


### Renaming the columns

In [76]:
df.rename(columns = {"Department" : "Dept"}, inplace = True)
df

Unnamed: 0,Name,Age,Dept
0,Anya,1024.0,Marketing
1,Ravi,1031.0,IT
2,John,1029.0,HR
3,Meera,,Finance
4,Ravi,1031.0,IT
5,Tina,1026.0,Marketing
6,John,1029.0,HR


### Checking all the unique values in a column

In [77]:
df["Name"].unique()
df["Dept"].unique()

array(['Marketing', 'IT', 'HR', 'Finance'], dtype=object)

### using "value_counts()" function

In [78]:
df["Dept"].value_counts()

Unnamed: 0_level_0,count
Dept,Unnamed: 1_level_1
Marketing,2
IT,2
HR,2
Finance,1


### Creating a new column

In [79]:
df["Salary"] = data["Salary"]

In [80]:
df

Unnamed: 0,Name,Age,Dept,Salary
0,Anya,1024.0,Marketing,52000.0
1,Ravi,1031.0,IT,63000.0
2,John,1029.0,HR,58000.0
3,Meera,,Finance,67000.0
4,Ravi,1031.0,IT,63000.0
5,Tina,1026.0,Marketing,
6,John,1029.0,HR,58000.0


In [81]:
df["Promoted Salary"] = df["Salary"] + 5000

In [82]:
df

Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary
0,Anya,1024.0,Marketing,52000.0,57000.0
1,Ravi,1031.0,IT,63000.0,68000.0
2,John,1029.0,HR,58000.0,63000.0
3,Meera,,Finance,67000.0,72000.0
4,Ravi,1031.0,IT,63000.0,68000.0
5,Tina,1026.0,Marketing,,
6,John,1029.0,HR,58000.0,63000.0


### **Data Cleaning**



In [83]:
df.isnull().sum()

Unnamed: 0,0
Name,0
Age,1
Dept,0
Salary,1
Promoted Salary,1


### Using "dropna" function

In [84]:
df.dropna()

Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary
0,Anya,1024.0,Marketing,52000.0,57000.0
1,Ravi,1031.0,IT,63000.0,68000.0
2,John,1029.0,HR,58000.0,63000.0
4,Ravi,1031.0,IT,63000.0,68000.0
6,John,1029.0,HR,58000.0,63000.0


In [85]:
df.dropna(how = "any") #if "how = 'any'" then any row which contains a null value will be delted

Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary
0,Anya,1024.0,Marketing,52000.0,57000.0
1,Ravi,1031.0,IT,63000.0,68000.0
2,John,1029.0,HR,58000.0,63000.0
4,Ravi,1031.0,IT,63000.0,68000.0
6,John,1029.0,HR,58000.0,63000.0


In [86]:
df.dropna(how = "all") # if "how = 'all'" then only if a row contains all the row as null values will be deleted

Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary
0,Anya,1024.0,Marketing,52000.0,57000.0
1,Ravi,1031.0,IT,63000.0,68000.0
2,John,1029.0,HR,58000.0,63000.0
3,Meera,,Finance,67000.0,72000.0
4,Ravi,1031.0,IT,63000.0,68000.0
5,Tina,1026.0,Marketing,,
6,John,1029.0,HR,58000.0,63000.0


### Using "fillna()" function

In [87]:
df.fillna(0) #fills very null value with 0

Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary
0,Anya,1024.0,Marketing,52000.0,57000.0
1,Ravi,1031.0,IT,63000.0,68000.0
2,John,1029.0,HR,58000.0,63000.0
3,Meera,0.0,Finance,67000.0,72000.0
4,Ravi,1031.0,IT,63000.0,68000.0
5,Tina,1026.0,Marketing,0.0,0.0
6,John,1029.0,HR,58000.0,63000.0


In [88]:
df["Age"] = df["Age"].fillna(df["Age"].mean()).round()
df

Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary
0,Anya,1024.0,Marketing,52000.0,57000.0
1,Ravi,1031.0,IT,63000.0,68000.0
2,John,1029.0,HR,58000.0,63000.0
3,Meera,1028.0,Finance,67000.0,72000.0
4,Ravi,1031.0,IT,63000.0,68000.0
5,Tina,1026.0,Marketing,,
6,John,1029.0,HR,58000.0,63000.0


In [89]:
df['Salary'] = pd.to_numeric(df['Salary'],errors = "coerce")

In [90]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Name             7 non-null      object 
 1   Age              7 non-null      float64
 2   Dept             7 non-null      object 
 3   Salary           6 non-null      float64
 4   Promoted Salary  6 non-null      float64
dtypes: float64(3), object(2)
memory usage: 412.0+ bytes


In [91]:
df["Salary"].fillna(df['Salary'].median())

Unnamed: 0,Salary
0,52000.0
1,63000.0
2,58000.0
3,67000.0
4,63000.0
5,60500.0
6,58000.0


In [92]:
df["Salary"].fillna(method = 'ffill') # uses the previous value before the null value to assigne it the null
df.fillna(method = "ffill",inplace=True)

  df["Salary"].fillna(method = 'ffill') # uses the previous value before the null value to assigne it the null
  df.fillna(method = "ffill",inplace=True)


In [93]:
df["Salary"].fillna(method = 'bfill') # uses the next value after the null value to assigne it the null

  df["Salary"].fillna(method = 'bfill') # uses the next value after the null value to assigne it the null


Unnamed: 0,Salary
0,52000.0
1,63000.0
2,58000.0
3,67000.0
4,63000.0
5,63000.0
6,58000.0


### "replace' function

In [94]:
df["Name"] = df["Name"].replace("Anya","Shiva")
df

Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary
0,Shiva,1024.0,Marketing,52000.0,57000.0
1,Ravi,1031.0,IT,63000.0,68000.0
2,John,1029.0,HR,58000.0,63000.0
3,Meera,1028.0,Finance,67000.0,72000.0
4,Ravi,1031.0,IT,63000.0,68000.0
5,Tina,1026.0,Marketing,63000.0,68000.0
6,John,1029.0,HR,58000.0,63000.0


### Dealing With Duplicate Data


In [95]:
df_dup = df[df.duplicated()]
df_dup

Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary
4,Ravi,1031.0,IT,63000.0,68000.0
6,John,1029.0,HR,58000.0,63000.0


In [96]:
df = df.drop_duplicates()
df

Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary
0,Shiva,1024.0,Marketing,52000.0,57000.0
1,Ravi,1031.0,IT,63000.0,68000.0
2,John,1029.0,HR,58000.0,63000.0
3,Meera,1028.0,Finance,67000.0,72000.0
5,Tina,1026.0,Marketing,63000.0,68000.0


In [97]:
df['Orginal Salary'] = df['Salary'] #back up
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Orginal Salary'] = df['Salary'] #back up


Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary,Orginal Salary
0,Shiva,1024.0,Marketing,52000.0,57000.0,52000.0
1,Ravi,1031.0,IT,63000.0,68000.0,63000.0
2,John,1029.0,HR,58000.0,63000.0,58000.0
3,Meera,1028.0,Finance,67000.0,72000.0,67000.0
5,Tina,1026.0,Marketing,63000.0,68000.0,63000.0


### Lambda Function for dealing with Invalid values

In [98]:
df["Promoted Salary"] = df['Promoted Salary'].apply(lambda x: 65000 if x>65000 else x)
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Promoted Salary"] = df['Promoted Salary'].apply(lambda x: 65000 if x>65000 else x)


Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary,Orginal Salary
0,Shiva,1024.0,Marketing,52000.0,57000.0,52000.0
1,Ravi,1031.0,IT,63000.0,65000.0,63000.0
2,John,1029.0,HR,58000.0,63000.0,58000.0
3,Meera,1028.0,Finance,67000.0,65000.0,67000.0
5,Tina,1026.0,Marketing,63000.0,65000.0,63000.0


In [99]:
df["Name"] = df["Name"].replace("Shiva", "Shiva_mani")
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Name"] = df["Name"].replace("Shiva", "Shiva_mani")


Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary,Orginal Salary
0,Shiva_mani,1024.0,Marketing,52000.0,57000.0,52000.0
1,Ravi,1031.0,IT,63000.0,65000.0,63000.0
2,John,1029.0,HR,58000.0,63000.0,58000.0
3,Meera,1028.0,Finance,67000.0,65000.0,67000.0
5,Tina,1026.0,Marketing,63000.0,65000.0,63000.0


In [109]:
df[["First_name","Last_name"]] = df["Name"].str.split("_",expand = True)
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[["First_name","Last_name"]] = df["Name"].str.split("_",expand = True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[["First_name","Last_name"]] = df["Name"].str.split("_",expand = True)


Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary,Orginal Salary,First_name,Last_name
0,Shiva_mani,1024.0,Marketing,52000.0,57000.0,52000.0,Shiva,mani
1,Ravi,1031.0,IT,63000.0,65000.0,63000.0,Ravi,
2,John,1029.0,HR,58000.0,63000.0,58000.0,John,
3,Meera,1028.0,Finance,67000.0,65000.0,67000.0,Meera,
5,Tina,1026.0,Marketing,63000.0,65000.0,63000.0,Tina,


In [108]:
def multiplying_age(x):
      return x*2

df['Age'] = df["Age"].apply(multiplying_age)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Age'] = df["Age"].apply(multiplying_age)


In [107]:
df

Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary,Orginal Salary
0,Shiva_mani,512.0,Marketing,52000.0,57000.0,52000.0
1,Ravi,515.5,IT,63000.0,65000.0,63000.0
2,John,514.5,HR,58000.0,63000.0,58000.0
3,Meera,514.0,Finance,67000.0,65000.0,67000.0
5,Tina,513.0,Marketing,63000.0,65000.0,63000.0


In [106]:
df['Age'] = df['Age'].apply(lambda x: x/2 )
df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Age'] = df['Age'].apply(lambda x: x/2 )


Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary,Orginal Salary
0,Shiva_mani,512.0,Marketing,52000.0,57000.0,52000.0
1,Ravi,515.5,IT,63000.0,65000.0,63000.0
2,John,514.5,HR,58000.0,63000.0,58000.0
3,Meera,514.0,Finance,67000.0,65000.0,67000.0
5,Tina,513.0,Marketing,63000.0,65000.0,63000.0


### Using Merging functions like "concat" & "merge"

In [105]:
dept_info = pd.DataFrame({
    "Dept": ["Marketing", "IT", "HR", "Finance"],
    "Manager": ["Amit Shah", "Neha Rao", "David Lee", "Sonia Patel"],
    "Location": ["Mumbai", "Hyderabad", "Delhi", "Bangalore"]
})


df2 = pd.DataFrame(dept_info)
df2

Unnamed: 0,Dept,Manager,Location
0,Marketing,Amit Shah,Mumbai
1,IT,Neha Rao,Hyderabad
2,HR,David Lee,Delhi
3,Finance,Sonia Patel,Bangalore


In [110]:
pd.concat([df,df2]) # it is horizontal join and since we have Null values so we use merge() now

Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary,Orginal Salary,First_name,Last_name,Manager,Location
0,Shiva_mani,1024.0,Marketing,52000.0,57000.0,52000.0,Shiva,mani,,
1,Ravi,1031.0,IT,63000.0,65000.0,63000.0,Ravi,,,
2,John,1029.0,HR,58000.0,63000.0,58000.0,John,,,
3,Meera,1028.0,Finance,67000.0,65000.0,67000.0,Meera,,,
5,Tina,1026.0,Marketing,63000.0,65000.0,63000.0,Tina,,,
0,,,Marketing,,,,,,Amit Shah,Mumbai
1,,,IT,,,,,,Neha Rao,Hyderabad
2,,,HR,,,,,,David Lee,Delhi
3,,,Finance,,,,,,Sonia Patel,Bangalore


In [111]:
pd.merge(df,df2,on = "Dept")

Unnamed: 0,Name,Age,Dept,Salary,Promoted Salary,Orginal Salary,First_name,Last_name,Manager,Location
0,Shiva_mani,1024.0,Marketing,52000.0,57000.0,52000.0,Shiva,mani,Amit Shah,Mumbai
1,Ravi,1031.0,IT,63000.0,65000.0,63000.0,Ravi,,Neha Rao,Hyderabad
2,John,1029.0,HR,58000.0,63000.0,58000.0,John,,David Lee,Delhi
3,Meera,1028.0,Finance,67000.0,65000.0,67000.0,Meera,,Sonia Patel,Bangalore
4,Tina,1026.0,Marketing,63000.0,65000.0,63000.0,Tina,,Amit Shah,Mumbai
