In [1]:
# Importing pandas
import pandas as pd

In [3]:
# Checking version
pd.__version__

'2.0.3'

# i) Data Structures in Pandas
## a) Series 
## b) Dataframe

## a) Series

<img src="https://i.morioh.com/210524/9788b6e2.webp">

# - Creating series from list

In [4]:
name = ['saksham', 'Bikas', 'kusum', 'Champa', 'Pranil']
s = pd.Series(name)
print(s)

0    saksham
1      Bikas
2      kusum
3     Champa
4     Pranil
dtype: object


In [12]:
s.values   # Returns a NumPy array

'Champa'

In [6]:
# Checking the dimension of the series. Hence, the series in pandas is one dimensional
s.ndim

1

## - Pandas doesn't support negative indexing

In [13]:
# Indexing of series
print(s[3])
# or
print(s.values[3])    # Best method

Champa
Champa


In [17]:
# allowed 
s.values[-2]
# Not allowed
s[-2]


'Champa'

## - Changing the index of a Pandas series

In [31]:
# Changing the index 0,1,2,3,4 to custom defined indices
s = pd.Series(name, index = ['a','b','c','d','e'])
# s = pd.Series(name, index = [101,102,103,104,105])
s


a    saksham
b      Bikas
c      kusum
d     Champa
e     Pranil
dtype: object

## - To access the index now: -

In [32]:
s['b']
# s[102]

'Bikas'

## - To access from .values method i.e NumPy array

In [33]:
s.values[0]

'saksham'

## - Providing name to the series

In [37]:
sample = ['Ram', 'Pete', 'Harry', 'Maguire']
s1 = pd.Series(sample, index= ['a','b','c','d'], name = 'Names') # Assigning name to the series
print(s1)

a        Ram
b       Pete
c      Harry
d    Maguire
Name: Names, dtype: object


## - Apply function on series

In [39]:
prices = ['$500', '$20', '$45', '$50']
mrp = pd.Series(prices, name= 'Price in Dollar')
mrp

0    $500
1     $20
2     $45
3     $50
Name: Price in Dollar, dtype: object

In [42]:
# LOGIC: - To remove the '$' (DOLLAR) sign
def clean_price(p):
    p = p.replace('$','').strip()
    return float(p)
clean_price()

500.0

## - Applying the function to remove DOLLAR sign from the above series 'mrp'
## - Apply() method is basically used in data cleaning

In [45]:
# Applying the function clean_price
mrp.apply(clean_price)

0    500.0
1     20.0
2     45.0
3     50.0
Name: Price in Dollar, dtype: float64

## - Performing the above operation in a single line

In [48]:
mrp.str.replace('$','', regex = False).astype(float)    # regular expression = False (Optional)

0    500.0
1     20.0
2     45.0
3     50.0
Name: Price in Dollar, dtype: float64

## - Addition of the series

In [64]:
num1 = pd.Series(range(1,15), name = 'First_no')
num2 = pd.Series(range(50,65), name = 'Second_no')
print(num1 + num2)

0     51.0
1     53.0
2     55.0
3     57.0
4     59.0
5     61.0
6     63.0
7     65.0
8     67.0
9     69.0
10    71.0
11    73.0
12    75.0
13    77.0
14     NaN
dtype: float64


## - Column-wise concatenation of the 2 series results in a DataFrame

In [65]:
# Concatenating num1 and num2 by column
df = pd.concat([num1, num2],  axis= 1)
df

Unnamed: 0,First_no,Second_no
0,1.0,50
1,2.0,51
2,3.0,52
3,4.0,53
4,5.0,54
5,6.0,55
6,7.0,56
7,8.0,57
8,9.0,58
9,10.0,59


In [60]:
# First five rows of a data frame
df.head()

Unnamed: 0,0,1
0,1.0,50
1,2.0,51
2,3.0,52
3,4.0,53
4,5.0,54


In [62]:
# Last five rows of a dataframe
df.tail()

Unnamed: 0,0,1
10,11.0,60
11,12.0,61
12,13.0,62
13,14.0,63
14,,64


In [63]:
# Checking the dimension of the dataframe
# Hence, a dataframe is of 2 or more dimension
df.ndim

2

# b) DataFrame

## - Creating dataframe from dictionary

In [81]:
about = {
    'name': ['Jason', 'Peter', 'Ronaldo', 'Luffy'],
    'gender':['F', 'M', 'M', 'M'],
}
dict_df = pd.DataFrame(about, index =[1,2,3,4])
dict_df.head()

Unnamed: 0,name,gender
1,Jason,F
2,Peter,M
3,Ronaldo,M
4,Luffy,M


In [75]:
dict_df.shape # Checking the shaps

(4, 2)

In [74]:
dict_df.ndim # Dimension

2

In [82]:
dict_df['name']   # Accessing the name column. It is a Series


1      Jason
2      Peter
3    Ronaldo
4      Luffy
Name: name, dtype: object

In [83]:
dict_df['gender']

1    F
2    M
3    M
4    M
Name: gender, dtype: object