# PANDAS in Python

Pandas is a data analysis library that provides a variety of data structures and data manipulation methods that allows to perform complex tasks with simple one-line commands.

There are two core objects in Python. They are 

1. DatFrame
2. Series


In [1]:
#To find out the version of Pandas library installed

import pandas
pandas.__version__

'0.23.0'

In [2]:
#Importing packages with alias name

import numpy as np
import pandas as pd

In [3]:
#Creating series with variable name as data
#Series is a 1 dimensional indexed array

data = pd.Series([0, 1, 2, 3, 4, 5])
data

0    0
1    1
2    2
3    3
4    4
5    5
dtype: int64

In [4]:
print(data)

0    0
1    1
2    2
3    3
4    4
5    5
dtype: int64


In [5]:
#print the values of data

data.values

array([0, 1, 2, 3, 4, 5], dtype=int64)

In [8]:
#print the index of data

data.index

RangeIndex(start=0, stop=6, step=1)

In [9]:
#Accessing Series value

data[4]

4

In [8]:
#Slicing of Series

data[2:5]

2    2
3    3
4    4
dtype: int64

In [10]:
#Assigning the customized index

data = pd.Series([0, 1, 2, 3, 4, 5], index = ["a" , "b" , "c" , "d" ,"e", "f"])
data

a    0
b    1
c    2
d    3
e    4
f    5
dtype: int64

In [11]:
#Converting Python Dict to Pandas Series ->Keys are changed as Indexes and values are changed to value of series.

Infant_Details = {"Name": "Akshay Kannan",
                 "Age": 1,
                 "Sex": "M"}
Infant = pd.Series(Infant_Details)
Infant

Name    Akshay Kannan
Age                 1
Sex                 M
dtype: object

In [12]:
#Accessing the Series

Infant.Name

'Akshay Kannan'

In [13]:
#Accessing the Series

Infant["Name"]

'Akshay Kannan'

In [14]:
#Slicing the Series 

Infant["Name":"Age"]

Name    Akshay Kannan
Age                 1
dtype: object

In [15]:
#Creating empty series

e_series = pd.Series()
e_series

Series([], dtype: float64)

In [16]:
#Converting Numpy array to Pandas Series

data11 = np.array(['a','b','c','d'])
np_series = pd.Series(data11)
np_series

0    a
1    b
2    c
3    d
dtype: object

In [17]:
#Create a Series from Scalar

s_series = pd.Series("Aarthi", index = [0,1,2,3,4])
s_series

0    Aarthi
1    Aarthi
2    Aarthi
3    Aarthi
4    Aarthi
dtype: object

In [18]:
#Reading a CSV file

info = pd.read_csv("D:/New folder/CSV Practise .csv")
info

Unnamed: 0,Sl no,Name,Designation,Age,Salary,Department
0,1,Aarthi Kalyanasundaram,Test Engineer,25,40000,Amex
1,2,Aishwarya Ragu,Developer,27,60000,Amex
2,3,Akshaya Dupati,Developer,25,50000,Amex
3,4,Arvind,Developer,24,50000,Amex
4,5,Senthil,Developer,38,80000,Amex
5,6,Jayaprakash,Developer,25,20000,Amex
6,7,Pandi,Developer,26,20000,Amex
7,8,Kandarp Pandya,Product Manager,30,80000,Amex
8,9,Ashish Dhyani,Data Engineer,30,70000,Amex
9,10,Shobana,Test Lead,40,80000,Amex


In [20]:
salary = pd.Series(info["Salary"])
salary

0     40,000
1     60,000
2     50,000
3     50,000
4     80,000
5     20,000
6     20,000
7     80,000
8     70,000
9     80,000
10    70,000
11    60,000
12    90,000
13    80,000
14    70,000
15    20,000
16    20,000
Name: Salary, dtype: object

In [21]:
salary.min()

'20,000'

In [22]:
salary.max()

'90,000'

In [23]:
#indexing the series using .loc() function -> provides the values from start num mentioned and include the end number also

salary.loc[3:5]

3    50,000
4    80,000
5    20,000
Name: Salary, dtype: object

In [24]:
#indexing the series using .iloc() function -> provides the values from start num mentioned and exclude the end number

salary.iloc[3:5]

3    50,000
4    80,000
Name: Salary, dtype: object

In [27]:
d1 = pd.Series([5, 2, 3, 7], index=['a', 'b', 'c', 'd'])
d2 = pd.Series([1, 6, 4, 9], index=['a', 'b', 'd', 'e'])
print(d1, "\n\n", d2)

a    5
b    2
c    3
d    7
dtype: int64 

 a    1
b    6
d    4
e    9
dtype: int64


In [30]:
d1.add(d2, fill_value=0)

a     6.0
b     8.0
c     3.0
d    11.0
e     9.0
dtype: float64

In [31]:
d1.add(d2)

a     6.0
b     8.0
c     NaN
d    11.0
e     NaN
dtype: float64

In [32]:
#Find out the data type of salary series

salary.dtype

dtype('O')

In [34]:
#Find out the count of elements in the salary series -> Provide all data points includes the null als0

salary.count()

17

In [37]:
#sort_values() -> sort all the data points in asc order by default

salary.sort_values()

16    20,000
15    20,000
6     20,000
5     20,000
0     40,000
3     50,000
2     50,000
1     60,000
11    60,000
10    70,000
14    70,000
8     70,000
7     80,000
9     80,000
13    80,000
4     80,000
12    90,000
Name: Salary, dtype: object

In [38]:
#To sort the values in desending order

salary.sort_values(ascending =False)

12    90,000
4     80,000
13    80,000
9     80,000
7     80,000
8     70,000
14    70,000
10    70,000
11    60,000
1     60,000
2     50,000
3     50,000
0     40,000
5     20,000
6     20,000
15    20,000
16    20,000
Name: Salary, dtype: object

In [39]:
#Sort the series in descending order and view only the first 10 values

salary.sort_values(ascending =False).head(10)

12    90,000
4     80,000
13    80,000
9     80,000
7     80,000
8     70,000
14    70,000
10    70,000
11    60,000
1     60,000
Name: Salary, dtype: object

In [40]:
#Sort the  series in descending order and  view only last 3 values

salary.sort_values(ascending =False).tail(3)

6     20,000
15    20,000
16    20,000
Name: Salary, dtype: object

In [41]:
#Find out the unique values in the series

salary.unique()

array(['40,000', '60,000', '50,000', '80,000', '20,000', '70,000',
       '90,000'], dtype=object)

In [42]:
#Find out the count of unique values in the series

salary.nunique()

7

In [43]:
#value_counts() -> to count the number of the times each unique value occurs in a Series

salary.value_counts()

80,000    4
20,000    4
70,000    3
50,000    2
60,000    2
40,000    1
90,000    1
Name: Salary, dtype: int64