### Pandas Library  

#### [PandasDocumentation Reference Link](https://pandas.pydata.org/docs/reference/index.html)


In [1]:
import numpy as np
import pandas as pd

#### Pandas Series Object 
#### Series is a matrix like object with index and values 

In [2]:
# Creating series objects from various data types

In [3]:
#From normal python lists
ages = [23, 32, 33, 18]
names = ['Raju', 'Ravi', 'Sita', 'Ram']


In [4]:
students = pd.Series(data = ages, index = names)
students

Raju    23
Ravi    32
Sita    33
Ram     18
dtype: int64

In [5]:
# from numpy array

marks = np.array([23,34,56,78])
students1 = pd.Series(data = marks) # In this case the index is not set
students1

0    23
1    34
2    56
3    78
dtype: int32

In [6]:
students1.index = names # Setting the index to the series

In [7]:
students1

Raju    23
Ravi    34
Sita    56
Ram     78
dtype: int32

In [8]:
# From dictionary 
dic = {'Ravi':35, 'Raju':45, 'Uma':18, 'John': 23}
students2 = pd.Series(data = dic) # In this case, the index is already taken from keys of dictionary
students2

Ravi    35
Raju    45
Uma     18
John    23
dtype: int64

### DataFrame 

In [9]:
#Data frame is similar to excel sheet, where, there are rownames , Column names, data in a table format


In [10]:
#Creating a dataframe 

In [11]:
row_names = ['English', 'Maths', 'Science', 'Geography']
names = ['Raju', 'Ravi', 'John', 'Laxman']
marks = np.random.randint(50,100, 16).reshape(4,4)

In [12]:
marks

array([[90, 91, 50, 88],
       [92, 52, 66, 64],
       [99, 61, 75, 69],
       [75, 50, 92, 93]])

In [13]:
marks_df = pd.DataFrame(data = marks, index = row_names, columns = names )
marks_df

Unnamed: 0,Raju,Ravi,John,Laxman
English,90,91,50,88
Maths,92,52,66,64
Science,99,61,75,69
Geography,75,50,92,93


In [14]:
type(marks_df)

pandas.core.frame.DataFrame

In [15]:
marks_df.index

Index(['English', 'Maths', 'Science', 'Geography'], dtype='object')

#### Note :  In a dataframe, each column, each row when are grabbed separately they are numy series objects, A dataframe is a bunch of series that share the same index. 

In [16]:
# Getting a column 

marks_df['Raju']

English      90
Maths        92
Science      99
Geography    75
Name: Raju, dtype: int32

In [17]:
type(marks_df['Raju'])

pandas.core.series.Series

In [18]:
#if we grab two columns the result is dataframe 
marks_df[['Raju', 'Ravi']]

Unnamed: 0,Raju,Ravi
English,90,91
Maths,92,52
Science,99,61
Geography,75,50


In [19]:
type(marks_df[['Raju', 'Ravi']])

pandas.core.frame.DataFrame

In [20]:
marks_df.loc['English', :]

Raju      90
Ravi      91
John      50
Laxman    88
Name: English, dtype: int32

In [21]:
type(marks_df.loc['English', :])

pandas.core.series.Series

In [22]:
marks_df.loc[['English', 'Maths'], :] # This is a data frame

Unnamed: 0,Raju,Ravi,John,Laxman
English,90,91,50,88
Maths,92,52,66,64


#### Adding a new column to the dataframe

In [23]:
marks_df['New'] = [62, 65, 67, 89]

In [24]:
marks_df

Unnamed: 0,Raju,Ravi,John,Laxman,New
English,90,91,50,88,62
Maths,92,52,66,64,65
Science,99,61,75,69,67
Geography,75,50,92,93,89


#### Removing this newly added column

In [25]:
# this creates a new dataframe without column new
marks_df.drop('New', axis = 1)

Unnamed: 0,Raju,Ravi,John,Laxman
English,90,91,50,88
Maths,92,52,66,64
Science,99,61,75,69
Geography,75,50,92,93


In [26]:
marks_df # You can notice, new column is still there

Unnamed: 0,Raju,Ravi,John,Laxman,New
English,90,91,50,88,62
Maths,92,52,66,64,65
Science,99,61,75,69,67
Geography,75,50,92,93,89


In [27]:
#inplace option
marks_df.drop('New', axis = 1, inplace = True)

In [28]:
marks_df

Unnamed: 0,Raju,Ravi,John,Laxman
English,90,91,50,88
Maths,92,52,66,64
Science,99,61,75,69
Geography,75,50,92,93


### Exploratory commands on the Dataframes

#### Shape - How many rows and How many columns

In [29]:
# Rows and columns 
marks_df.shape 

(4, 4)

#### info()

In [30]:
marks_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, English to Geography
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Raju    4 non-null      int32
 1   Ravi    4 non-null      int32
 2   John    4 non-null      int32
 3   Laxman  4 non-null      int32
dtypes: int32(4)
memory usage: 268.0+ bytes


#### describe -- Statistical information of the Dataframe

In [31]:
marks_df.describe()

Unnamed: 0,Raju,Ravi,John,Laxman
count,4.0,4.0,4.0,4.0
mean,89.0,63.5,70.75,78.5
std,10.099505,18.947295,17.538054,14.153916
min,75.0,50.0,50.0,64.0
25%,86.25,51.5,62.0,67.75
50%,91.0,56.5,70.5,78.5
75%,93.75,68.5,79.25,89.25
max,99.0,91.0,92.0,93.0


In [32]:
# About columns
marks_df.columns

Index(['Raju', 'Ravi', 'John', 'Laxman'], dtype='object')

#### dtypes

In [33]:
marks_df.dtypes

Raju      int32
Ravi      int32
John      int32
Laxman    int32
dtype: object

#### head, tail

In [34]:
marks_df.head(3) # Top 3 rows

Unnamed: 0,Raju,Ravi,John,Laxman
English,90,91,50,88
Maths,92,52,66,64
Science,99,61,75,69


In [35]:
marks_df.tail(3) # bottom 3 rows

Unnamed: 0,Raju,Ravi,John,Laxman
Maths,92,52,66,64
Science,99,61,75,69
Geography,75,50,92,93


In [37]:
# Index of the dataframe
marks_df.index

Index(['English', 'Maths', 'Science', 'Geography'], dtype='object')

### Making a dataframe from dictionary

In [45]:
dic = {'Raju': [35,75,28,38],'Ravi':[72,45,58,62], "John": [56, 76, 84, 45]}

In [47]:
pd.DataFrame(data = dic, index = ['English', 'Maths', 'Science', 'Geography'])

Unnamed: 0,Raju,Ravi,John
English,35,72,56
Maths,75,45,76
Science,28,58,84
Geography,38,62,45
