# Getting Started with Pandas

In [1]:
import numpy as np

In [2]:
import pandas as pd

## Introduction to Pandas Data Structure

### Series

In [3]:
obj = pd.Series([4, -1, 5, 9])

In [4]:
obj

0    4
1   -1
2    5
3    9
dtype: int64

In [5]:
obj.array

<NumpyExtensionArray>
[4, -1, 5, 9]
Length: 4, dtype: int64

In [6]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [7]:
np.exp(obj)

0      54.598150
1       0.367879
2     148.413159
3    8103.083928
dtype: float64

In [8]:
# Author and their book
Author_Book = {"Toni Morrison": "The Bluest Eyes", "Leo Tolstoy": "War and Peace", "Fyodor Dostoevsky": "The Idiot", "Ayn Rand": "The Fountainhead"}

In [9]:
pd.Series(Author_Book)

Toni Morrison         The Bluest Eyes
Leo Tolstoy             War and Peace
Fyodor Dostoevsky           The Idiot
Ayn Rand             The Fountainhead
dtype: object

In [10]:
Book_Year = {"The Joy of X": 2014, "Algoriths to Live By": 2016, "Man's Search for Meaning": 1946, "The Study in Scarlet": 1887}

In [11]:
obj2 = pd.Series(Book_Year)
obj2

The Joy of X                2014
Algoriths to Live By        2016
Man's Search for Meaning    1946
The Study in Scarlet        1887
dtype: int64

In [13]:
print(obj2.index)

Index(['The Joy of X', 'Algoriths to Live By', 'Man's Search for Meaning',
       'The Study in Scarlet'],
      dtype='object')


In [14]:
obj2.to_dict()

{'The Joy of X': 2014,
 'Algoriths to Live By': 2016,
 "Man's Search for Meaning": 1946,
 'The Study in Scarlet': 1887}

### DataFrame

In [19]:
students = {"name": ["lucky", "shubham", "ayan", "harry", "hank", "prince", "vishwas"],
            "subject": ["mathematics", "arts", "computer science", "programming", "literature", "physics", "youtube"],
            "year": [2021, 2021, 2020, 2015, 2002, 2022, 2017]}

In [20]:
students

{'name': ['lucky', 'shubham', 'ayan', 'harry', 'hank', 'prince', 'vishwas'],
 'subject': ['mathematics',
  'arts',
  'computer science',
  'programming',
  'literature',
  'physics',
  'youtube'],
 'year': [2021, 2021, 2020, 2015, 2002, 2022, 2017]}

In [23]:
# usual way to see
obj3 = pd.Series(students)
obj3

name       [lucky, shubham, ayan, harry, hank, prince, vi...
subject    [mathematics, arts, computer science, programm...
year              [2021, 2021, 2020, 2015, 2002, 2022, 2017]
dtype: object

In [24]:
# dataframe way ;)
frame = pd.DataFrame(students)
frame

Unnamed: 0,name,subject,year
0,lucky,mathematics,2021
1,shubham,arts,2021
2,ayan,computer science,2020
3,harry,programming,2015
4,hank,literature,2002
5,prince,physics,2022
6,vishwas,youtube,2017


In [25]:
# for large dataset head() selects first five rows
frame.head()

Unnamed: 0,name,subject,year
0,lucky,mathematics,2021
1,shubham,arts,2021
2,ayan,computer science,2020
3,harry,programming,2015
4,hank,literature,2002


In [26]:
# similarily tail() returns the last five rows
frame.tail()

Unnamed: 0,name,subject,year
2,ayan,computer science,2020
3,harry,programming,2015
4,hank,literature,2002
5,prince,physics,2022
6,vishwas,youtube,2017


In [27]:
pd.DataFrame(students, columns=["year", "name", "subject"])

Unnamed: 0,year,name,subject
0,2021,lucky,mathematics
1,2021,shubham,arts
2,2020,ayan,computer science
3,2015,harry,programming
4,2002,hank,literature
5,2022,prince,physics
6,2017,vishwas,youtube


In [28]:
frame["name"]

0      lucky
1    shubham
2       ayan
3      harry
4       hank
5     prince
6    vishwas
Name: name, dtype: object

In [29]:
# loc and iloc method
frame.loc[0]

name             lucky
subject    mathematics
year              2021
Name: 0, dtype: object

In [30]:
# adding new column in "frame" named "grade"
frame2 = pd.DataFrame(students, columns=["year", "name", "subject", "grade"])
frame2

Unnamed: 0,year,name,subject,grade
0,2021,lucky,mathematics,
1,2021,shubham,arts,
2,2020,ayan,computer science,
3,2015,harry,programming,
4,2002,hank,literature,
5,2022,prince,physics,
6,2017,vishwas,youtube,


In [32]:
# assigning grades using numpy 
frame2["grade"] = np.arange(3., 10.)
frame2

Unnamed: 0,year,name,subject,grade
0,2021,lucky,mathematics,3.0
1,2021,shubham,arts,4.0
2,2020,ayan,computer science,5.0
3,2015,harry,programming,6.0
4,2002,hank,literature,7.0
5,2022,prince,physics,8.0
6,2017,vishwas,youtube,9.0


In [34]:
# seeing hank's data
frame2.iloc[4].to_dict()

{'year': 2002, 'name': 'hank', 'subject': 'literature', 'grade': 7.0}

In [35]:
frame2.to_numpy()

array([[2021, 'lucky', 'mathematics', 3.0],
       [2021, 'shubham', 'arts', 4.0],
       [2020, 'ayan', 'computer science', 5.0],
       [2015, 'harry', 'programming', 6.0],
       [2002, 'hank', 'literature', 7.0],
       [2022, 'prince', 'physics', 8.0],
       [2017, 'vishwas', 'youtube', 9.0]], dtype=object)

## Essential Functionality

### Reindexing

In [49]:
example_obj = pd.Series([1, 3.09, 4.98, 5.67, 9.67], index=["o", "a", "u", "e", "i"])

In [50]:
# let's see what is example_obj
example_obj

o    1.00
a    3.09
u    4.98
e    5.67
i    9.67
dtype: float64

In [53]:
# now lemme reindex its index 
# example_obj.reindex(["a", "e", "i", "o", "u"])

In [51]:
example_obj_2 = example_obj.reindex(["a", "e", "i", "o", "u"])

In [52]:
example_obj_2

a    3.09
e    5.67
i    9.67
o    1.00
u    4.98
dtype: float64

In [59]:
frame3 = pd.DataFrame(np.arange(9).reshape((3, 3)), 
                      index = ["F", "S", "T"],
                      columns = ["RJ", "Gujarat", "MH"])

In [60]:
frame3

Unnamed: 0,RJ,Gujarat,MH
F,0,1,2
S,3,4,5
T,6,7,8


In [61]:
frame4 = frame3.reindex(["F", "S", "T", "E"])
frame4

Unnamed: 0,RJ,Gujarat,MH
F,0.0,1.0,2.0
S,3.0,4.0,5.0
T,6.0,7.0,8.0
E,,,


### Dropping Entries from The Axis