### Pandas Library  

In [16]:
import numpy as np
import pandas as pd

#### Pandas Series Object 
#### Series is a matrix like object with index and values 

In [2]:
# Creating series objects from various data types

In [3]:
#From normal python lists
ages = [23, 32, 33, 18]
names = ['Raju', 'Ravi', 'Sita', 'Ram']


In [4]:
students = pd.Series(data = ages, index = names)
students

Raju    23
Ravi    32
Sita    33
Ram     18
dtype: int64

In [5]:
# from numpy array

marks = np.array([23,34,56,78])
students1 = pd.Series(data = marks) # In this case the index is not set
students1

0    23
1    34
2    56
3    78
dtype: int32

In [6]:
students1.index = names # Setting the index to the series

In [7]:
students1

Raju    23
Ravi    34
Sita    56
Ram     78
dtype: int32

In [8]:
# From dictionary 
dic = {'Ravi':35, 'Raju':45, 'Uma':18, 'John': 23}
students2 = pd.Series(data = dic) # In this case, the index is already taken from keys of dictionary
students2

Ravi    35
Raju    45
Uma     18
John    23
dtype: int64

### DataFrame 

In [9]:
#Data frame is similar to excel sheet, where, there are rownames , Column names, data in a table format


In [10]:
#Creating a dataframe 

In [14]:
row_names = ['English', 'Maths', 'Science', 'Geography']
names = ['Raju', 'Ravi', 'John', 'Laxman']
marks = np.random.randint(50,100, 16).reshape(4,4)

In [15]:
marks

array([[51, 94, 63, 84],
       [69, 50, 66, 65],
       [96, 51, 99, 77],
       [87, 88, 94, 96]])

In [20]:
marks_df = pd.DataFrame(data = marks, index = row_names, columns = names )
marks_df

Unnamed: 0,Raju,Ravi,John,Laxman
English,51,94,63,84
Maths,69,50,66,65
Science,96,51,99,77
Geography,87,88,94,96


In [21]:
type(marks_df)

pandas.core.frame.DataFrame

In [22]:
marks_df.index

Index(['English', 'Maths', 'Science', 'Geography'], dtype='object')

#### Note :  In a dataframe, each column, each row when are grabbed separately they are numy series objects, A dataframe is a bunch of series that share the same index. 

In [23]:
# Getting a column 

marks_df['Raju']

English      51
Maths        69
Science      96
Geography    87
Name: Raju, dtype: int32

In [24]:
type(marks_df['Raju'])

pandas.core.series.Series

In [28]:
#if we grab two columns the result is dataframe 
marks_df[['Raju', 'Ravi']]

Unnamed: 0,Raju,Ravi
English,51,94
Maths,69,50
Science,96,51
Geography,87,88


In [29]:
type(marks_df[['Raju', 'Ravi']])

pandas.core.frame.DataFrame

In [37]:
marks_df.loc['English', :]

Raju      51
Ravi      94
John      63
Laxman    84
Name: English, dtype: int32

In [38]:
type(marks_df.loc['English', :])

pandas.core.series.Series

In [41]:
marks_df.loc[['English', 'Maths'], :] # This is a data frame

Unnamed: 0,Raju,Ravi,John,Laxman
English,51,94,63,84
Maths,69,50,66,65


#### Adding a new column to the dataframe

In [42]:
marks_df['New'] = [62, 65, 67, 89]

In [43]:
marks_df

Unnamed: 0,Raju,Ravi,John,Laxman,New
English,51,94,63,84,62
Maths,69,50,66,65,65
Science,96,51,99,77,67
Geography,87,88,94,96,89


#### Removing this newly added column

In [45]:
# this creates a new dataframe without column new
marks_df.drop('New', axis = 1)

Unnamed: 0,Raju,Ravi,John,Laxman
English,51,94,63,84
Maths,69,50,66,65
Science,96,51,99,77
Geography,87,88,94,96


In [47]:
marks_df # You can notice, new column is still there

Unnamed: 0,Raju,Ravi,John,Laxman,New
English,51,94,63,84,62
Maths,69,50,66,65,65
Science,96,51,99,77,67
Geography,87,88,94,96,89


In [48]:
#inplace option
marks_df.drop('New', axis = 1, inplace = True)

In [49]:
marks_df

Unnamed: 0,Raju,Ravi,John,Laxman
English,51,94,63,84
Maths,69,50,66,65
Science,96,51,99,77
Geography,87,88,94,96


### Exploratory commands on the Dataframes

#### Shape - How many rows and How many columns

In [52]:
# Rows and columns 
marks_df.shape 

(4, 4)

#### info()

In [53]:
marks_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, English to Geography
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Raju    4 non-null      int32
 1   Ravi    4 non-null      int32
 2   John    4 non-null      int32
 3   Laxman  4 non-null      int32
dtypes: int32(4)
memory usage: 268.0+ bytes


#### describe -- Statistical information of the Dataframe

In [55]:
marks_df.describe()

Unnamed: 0,Raju,Ravi,John,Laxman
count,4.0,4.0,4.0,4.0
mean,75.75,70.75,80.5,80.5
std,19.956202,23.51418,18.627936,12.974334
min,51.0,50.0,63.0,65.0
25%,64.5,50.75,65.25,74.0
50%,78.0,69.5,80.0,80.5
75%,89.25,89.5,95.25,87.0
max,96.0,94.0,99.0,96.0
